|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1376, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007267441860465116, |
|
"grad_norm": 9.074579620427265, |
|
"learning_rate": 0.0, |
|
"loss": 2.5726, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014534883720930232, |
|
"grad_norm": 9.325884168450177, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 2.5915, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002180232558139535, |
|
"grad_norm": 9.577883515575813, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 2.5166, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0029069767441860465, |
|
"grad_norm": 9.081960014952461, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 2.5275, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003633720930232558, |
|
"grad_norm": 9.146732295895594, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 2.6068, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00436046511627907, |
|
"grad_norm": 8.878352797679302, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.5602, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005087209302325582, |
|
"grad_norm": 8.7892387976441, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 2.5768, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.005813953488372093, |
|
"grad_norm": 8.439946529584597, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 2.571, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006540697674418605, |
|
"grad_norm": 7.525421260655473, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 2.5806, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007267441860465116, |
|
"grad_norm": 7.420995687542313, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 2.5632, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007994186046511628, |
|
"grad_norm": 6.529233432613504, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.5702, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00872093023255814, |
|
"grad_norm": 5.076235841533463, |
|
"learning_rate": 2.2e-06, |
|
"loss": 2.4557, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00944767441860465, |
|
"grad_norm": 4.530065999910271, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 2.4625, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.010174418604651164, |
|
"grad_norm": 3.757971086858522, |
|
"learning_rate": 2.6e-06, |
|
"loss": 2.5126, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.010901162790697675, |
|
"grad_norm": 3.0710603482243517, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 2.4419, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011627906976744186, |
|
"grad_norm": 2.8230945104887333, |
|
"learning_rate": 3e-06, |
|
"loss": 2.437, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.012354651162790697, |
|
"grad_norm": 2.6279435114027168, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 2.3592, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01308139534883721, |
|
"grad_norm": 2.5161015684914774, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 2.3957, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.013808139534883721, |
|
"grad_norm": 2.4278917076308475, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 2.3352, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.014534883720930232, |
|
"grad_norm": 2.348998890488855, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 2.4372, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015261627906976744, |
|
"grad_norm": 2.4780736413377396, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.3838, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.015988372093023256, |
|
"grad_norm": 2.5057556166741604, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 2.4095, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.016715116279069766, |
|
"grad_norm": 2.3191989107995648, |
|
"learning_rate": 4.4e-06, |
|
"loss": 2.3799, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01744186046511628, |
|
"grad_norm": 2.187761956889251, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 2.4215, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.018168604651162792, |
|
"grad_norm": 1.670881599245353, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 2.395, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0188953488372093, |
|
"grad_norm": 1.5365760213246764, |
|
"learning_rate": 5e-06, |
|
"loss": 2.4282, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.019622093023255814, |
|
"grad_norm": 1.657846538045198, |
|
"learning_rate": 5.2e-06, |
|
"loss": 2.4198, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.020348837209302327, |
|
"grad_norm": 1.5638927670066207, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 2.3976, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.021075581395348836, |
|
"grad_norm": 1.5407040415402946, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 2.4409, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02180232558139535, |
|
"grad_norm": 1.4066346512940076, |
|
"learning_rate": 5.8e-06, |
|
"loss": 2.3676, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02252906976744186, |
|
"grad_norm": 1.3673414382471556, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3971, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.023255813953488372, |
|
"grad_norm": 1.495128974221532, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 2.4844, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.023982558139534885, |
|
"grad_norm": 2.2845356742596397, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 2.3814, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.024709302325581394, |
|
"grad_norm": 1.443466209692566, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 2.4134, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.025436046511627907, |
|
"grad_norm": 1.7492603796870134, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 2.3634, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02616279069767442, |
|
"grad_norm": 1.6767321941082, |
|
"learning_rate": 7e-06, |
|
"loss": 2.4547, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02688953488372093, |
|
"grad_norm": 1.4015556883781917, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 2.3875, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.027616279069767442, |
|
"grad_norm": 1.3729850201136446, |
|
"learning_rate": 7.4e-06, |
|
"loss": 2.3611, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.028343023255813952, |
|
"grad_norm": 1.4151893697508262, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 2.3659, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.029069767441860465, |
|
"grad_norm": 1.475208153189891, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 2.371, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.029796511627906978, |
|
"grad_norm": 1.5523241828124394, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.3868, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.030523255813953487, |
|
"grad_norm": 1.3127492176118258, |
|
"learning_rate": 8.2e-06, |
|
"loss": 2.4054, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 1.2689101037621893, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 2.3864, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03197674418604651, |
|
"grad_norm": 1.335611750054927, |
|
"learning_rate": 8.6e-06, |
|
"loss": 2.442, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.032703488372093026, |
|
"grad_norm": 1.2452087861474146, |
|
"learning_rate": 8.8e-06, |
|
"loss": 2.3697, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03343023255813953, |
|
"grad_norm": 1.4439605178224628, |
|
"learning_rate": 9e-06, |
|
"loss": 2.3734, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.034156976744186045, |
|
"grad_norm": 1.2927603151316898, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 2.4011, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03488372093023256, |
|
"grad_norm": 1.185649482589594, |
|
"learning_rate": 9.4e-06, |
|
"loss": 2.3013, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03561046511627907, |
|
"grad_norm": 1.6520257001233214, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 2.3405, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.036337209302325583, |
|
"grad_norm": 1.415240980367823, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 2.3749, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.037063953488372096, |
|
"grad_norm": 1.1599368922180915, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3551, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0377906976744186, |
|
"grad_norm": 1.3457151082199426, |
|
"learning_rate": 9.9999859669361e-06, |
|
"loss": 2.3236, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.038517441860465115, |
|
"grad_norm": 1.3225280521166263, |
|
"learning_rate": 9.999943867823174e-06, |
|
"loss": 2.3406, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.03924418604651163, |
|
"grad_norm": 1.3072339789726655, |
|
"learning_rate": 9.999873702897528e-06, |
|
"loss": 2.3821, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03997093023255814, |
|
"grad_norm": 1.4674401083737256, |
|
"learning_rate": 9.999775472553019e-06, |
|
"loss": 2.3389, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.040697674418604654, |
|
"grad_norm": 7.364273715054014, |
|
"learning_rate": 9.999649177341036e-06, |
|
"loss": 2.3676, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04142441860465116, |
|
"grad_norm": 1.4164513349686614, |
|
"learning_rate": 9.999494817970498e-06, |
|
"loss": 2.3726, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04215116279069767, |
|
"grad_norm": 1.216986905939288, |
|
"learning_rate": 9.999312395307861e-06, |
|
"loss": 2.3642, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.042877906976744186, |
|
"grad_norm": 1.0885159567206724, |
|
"learning_rate": 9.999101910377107e-06, |
|
"loss": 2.3726, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0436046511627907, |
|
"grad_norm": 1.2741330951835304, |
|
"learning_rate": 9.998863364359734e-06, |
|
"loss": 2.3776, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04433139534883721, |
|
"grad_norm": 1.1966086656900674, |
|
"learning_rate": 9.998596758594752e-06, |
|
"loss": 2.4317, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04505813953488372, |
|
"grad_norm": 1.336165326700706, |
|
"learning_rate": 9.998302094578685e-06, |
|
"loss": 2.3995, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04578488372093023, |
|
"grad_norm": 1.1879637364009348, |
|
"learning_rate": 9.997979373965542e-06, |
|
"loss": 2.3422, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.046511627906976744, |
|
"grad_norm": 1.1912341449378812, |
|
"learning_rate": 9.99762859856683e-06, |
|
"loss": 2.4091, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.047238372093023256, |
|
"grad_norm": 1.2930140030336172, |
|
"learning_rate": 9.997249770351531e-06, |
|
"loss": 2.3575, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04796511627906977, |
|
"grad_norm": 1.194601702985452, |
|
"learning_rate": 9.996842891446092e-06, |
|
"loss": 2.3894, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.04869186046511628, |
|
"grad_norm": 1.2151890579244191, |
|
"learning_rate": 9.996407964134416e-06, |
|
"loss": 2.3649, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.04941860465116279, |
|
"grad_norm": 1.2265501897788895, |
|
"learning_rate": 9.995944990857848e-06, |
|
"loss": 2.3229, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0501453488372093, |
|
"grad_norm": 1.330613412551332, |
|
"learning_rate": 9.995453974215164e-06, |
|
"loss": 2.3961, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.050872093023255814, |
|
"grad_norm": 1.278238260413937, |
|
"learning_rate": 9.994934916962547e-06, |
|
"loss": 2.4385, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05159883720930233, |
|
"grad_norm": 1.1586366855530064, |
|
"learning_rate": 9.994387822013586e-06, |
|
"loss": 2.3732, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05232558139534884, |
|
"grad_norm": 1.217932106034384, |
|
"learning_rate": 9.993812692439247e-06, |
|
"loss": 2.3922, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.053052325581395346, |
|
"grad_norm": 1.1455244475688116, |
|
"learning_rate": 9.99320953146786e-06, |
|
"loss": 2.3852, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05377906976744186, |
|
"grad_norm": 1.1743872650502356, |
|
"learning_rate": 9.992578342485107e-06, |
|
"loss": 2.3637, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05450581395348837, |
|
"grad_norm": 1.2681205469149612, |
|
"learning_rate": 9.991919129033994e-06, |
|
"loss": 2.4364, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.055232558139534885, |
|
"grad_norm": 1.2522437864600409, |
|
"learning_rate": 9.99123189481483e-06, |
|
"loss": 2.3807, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0559593023255814, |
|
"grad_norm": 1.1756390549643723, |
|
"learning_rate": 9.990516643685222e-06, |
|
"loss": 2.323, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.056686046511627904, |
|
"grad_norm": 1.388760289434306, |
|
"learning_rate": 9.98977337966003e-06, |
|
"loss": 2.3559, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.057412790697674417, |
|
"grad_norm": 1.2560913594757854, |
|
"learning_rate": 9.989002106911368e-06, |
|
"loss": 2.3606, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.05813953488372093, |
|
"grad_norm": 1.354056708770636, |
|
"learning_rate": 9.988202829768562e-06, |
|
"loss": 2.3182, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05886627906976744, |
|
"grad_norm": 1.5428329715782925, |
|
"learning_rate": 9.987375552718133e-06, |
|
"loss": 2.3682, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.059593023255813955, |
|
"grad_norm": 1.821989967160763, |
|
"learning_rate": 9.986520280403775e-06, |
|
"loss": 2.3528, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06031976744186047, |
|
"grad_norm": 1.2376733004557532, |
|
"learning_rate": 9.985637017626326e-06, |
|
"loss": 2.3695, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.061046511627906974, |
|
"grad_norm": 1.173034231871182, |
|
"learning_rate": 9.984725769343737e-06, |
|
"loss": 2.279, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06177325581395349, |
|
"grad_norm": 1.1918956966775534, |
|
"learning_rate": 9.983786540671052e-06, |
|
"loss": 2.3667, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 1.2273572948900426, |
|
"learning_rate": 9.982819336880369e-06, |
|
"loss": 2.3373, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06322674418604651, |
|
"grad_norm": 1.1231561709191837, |
|
"learning_rate": 9.981824163400827e-06, |
|
"loss": 2.3194, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06395348837209303, |
|
"grad_norm": 1.202004965525141, |
|
"learning_rate": 9.980801025818556e-06, |
|
"loss": 2.3587, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06468023255813954, |
|
"grad_norm": 1.2282314740315383, |
|
"learning_rate": 9.979749929876658e-06, |
|
"loss": 2.3797, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06540697674418605, |
|
"grad_norm": 1.3144649205068073, |
|
"learning_rate": 9.978670881475173e-06, |
|
"loss": 2.4046, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06613372093023256, |
|
"grad_norm": 1.3880094986440303, |
|
"learning_rate": 9.977563886671043e-06, |
|
"loss": 2.3486, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06686046511627906, |
|
"grad_norm": 1.327629914175587, |
|
"learning_rate": 9.976428951678077e-06, |
|
"loss": 2.3443, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06758720930232558, |
|
"grad_norm": 1.4219965859285533, |
|
"learning_rate": 9.975266082866923e-06, |
|
"loss": 2.4261, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.06831395348837209, |
|
"grad_norm": 1.2182939431389077, |
|
"learning_rate": 9.974075286765027e-06, |
|
"loss": 2.3691, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0690406976744186, |
|
"grad_norm": 1.1061466662626884, |
|
"learning_rate": 9.972856570056594e-06, |
|
"loss": 2.3745, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06976744186046512, |
|
"grad_norm": 1.1094985189262911, |
|
"learning_rate": 9.971609939582556e-06, |
|
"loss": 2.3647, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07049418604651163, |
|
"grad_norm": 1.1428377874275164, |
|
"learning_rate": 9.970335402340534e-06, |
|
"loss": 2.326, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07122093023255814, |
|
"grad_norm": 1.2916043783430748, |
|
"learning_rate": 9.969032965484789e-06, |
|
"loss": 2.4184, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07194767441860465, |
|
"grad_norm": 1.1245599922593577, |
|
"learning_rate": 9.967702636326195e-06, |
|
"loss": 2.3263, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07267441860465117, |
|
"grad_norm": 1.1704821318393857, |
|
"learning_rate": 9.96634442233219e-06, |
|
"loss": 2.3092, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07340116279069768, |
|
"grad_norm": 1.4100861747980609, |
|
"learning_rate": 9.964958331126735e-06, |
|
"loss": 2.3648, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07412790697674419, |
|
"grad_norm": 1.2225339934583712, |
|
"learning_rate": 9.96354437049027e-06, |
|
"loss": 2.3505, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07485465116279069, |
|
"grad_norm": 1.1411365305964143, |
|
"learning_rate": 9.96210254835968e-06, |
|
"loss": 2.3503, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0755813953488372, |
|
"grad_norm": 1.1283411831087642, |
|
"learning_rate": 9.960632872828233e-06, |
|
"loss": 2.3763, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07630813953488372, |
|
"grad_norm": 1.4669332760539098, |
|
"learning_rate": 9.959135352145552e-06, |
|
"loss": 2.3131, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07703488372093023, |
|
"grad_norm": 1.1790811219137445, |
|
"learning_rate": 9.957609994717559e-06, |
|
"loss": 2.3473, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07776162790697674, |
|
"grad_norm": 1.1041919220536518, |
|
"learning_rate": 9.956056809106426e-06, |
|
"loss": 2.2556, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07848837209302326, |
|
"grad_norm": 1.3219326629276105, |
|
"learning_rate": 9.954475804030539e-06, |
|
"loss": 2.3901, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.07921511627906977, |
|
"grad_norm": 1.1045288858846065, |
|
"learning_rate": 9.952866988364431e-06, |
|
"loss": 2.3432, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.07994186046511628, |
|
"grad_norm": 1.2740973453849955, |
|
"learning_rate": 9.95123037113875e-06, |
|
"loss": 2.4174, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0806686046511628, |
|
"grad_norm": 1.192838557735101, |
|
"learning_rate": 9.9495659615402e-06, |
|
"loss": 2.378, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08139534883720931, |
|
"grad_norm": 1.1316106615191093, |
|
"learning_rate": 9.947873768911483e-06, |
|
"loss": 2.3669, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08212209302325581, |
|
"grad_norm": 1.1953371633708236, |
|
"learning_rate": 9.946153802751257e-06, |
|
"loss": 2.3339, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08284883720930232, |
|
"grad_norm": 1.1589422598242156, |
|
"learning_rate": 9.944406072714086e-06, |
|
"loss": 2.3633, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08357558139534883, |
|
"grad_norm": 1.1665993258833627, |
|
"learning_rate": 9.942630588610368e-06, |
|
"loss": 2.3718, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08430232558139535, |
|
"grad_norm": 1.2252914448806391, |
|
"learning_rate": 9.940827360406297e-06, |
|
"loss": 2.3757, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08502906976744186, |
|
"grad_norm": 1.99966100562339, |
|
"learning_rate": 9.938996398223802e-06, |
|
"loss": 2.3849, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08575581395348837, |
|
"grad_norm": 1.2354896948177605, |
|
"learning_rate": 9.937137712340483e-06, |
|
"loss": 2.3991, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08648255813953488, |
|
"grad_norm": 1.246354759029233, |
|
"learning_rate": 9.935251313189564e-06, |
|
"loss": 2.4023, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.0872093023255814, |
|
"grad_norm": 1.2116770658459601, |
|
"learning_rate": 9.933337211359833e-06, |
|
"loss": 2.3887, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08793604651162791, |
|
"grad_norm": 1.2249695456790302, |
|
"learning_rate": 9.931395417595568e-06, |
|
"loss": 2.373, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08866279069767442, |
|
"grad_norm": 1.0642820314257344, |
|
"learning_rate": 9.929425942796502e-06, |
|
"loss": 2.3351, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.08938953488372094, |
|
"grad_norm": 1.2743502150368227, |
|
"learning_rate": 9.927428798017738e-06, |
|
"loss": 2.3391, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09011627906976744, |
|
"grad_norm": 1.3340646666078864, |
|
"learning_rate": 9.925403994469702e-06, |
|
"loss": 2.4218, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09084302325581395, |
|
"grad_norm": 1.566724790718566, |
|
"learning_rate": 9.92335154351807e-06, |
|
"loss": 2.3399, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09156976744186046, |
|
"grad_norm": 1.0741534822566858, |
|
"learning_rate": 9.921271456683716e-06, |
|
"loss": 2.4141, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09229651162790697, |
|
"grad_norm": 1.1975939734272736, |
|
"learning_rate": 9.919163745642633e-06, |
|
"loss": 2.3331, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09302325581395349, |
|
"grad_norm": 1.26408679935078, |
|
"learning_rate": 9.91702842222588e-06, |
|
"loss": 2.3798, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 1.2052694508772792, |
|
"learning_rate": 9.91486549841951e-06, |
|
"loss": 2.3746, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09447674418604651, |
|
"grad_norm": 1.1489590029014498, |
|
"learning_rate": 9.912674986364502e-06, |
|
"loss": 2.3986, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09520348837209303, |
|
"grad_norm": 1.2440161112749069, |
|
"learning_rate": 9.91045689835669e-06, |
|
"loss": 2.3182, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09593023255813954, |
|
"grad_norm": 1.2574929722039276, |
|
"learning_rate": 9.908211246846708e-06, |
|
"loss": 2.3595, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09665697674418605, |
|
"grad_norm": 1.1422750235883177, |
|
"learning_rate": 9.905938044439904e-06, |
|
"loss": 2.3402, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09738372093023256, |
|
"grad_norm": 1.1320332374781519, |
|
"learning_rate": 9.903637303896272e-06, |
|
"loss": 2.3584, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.09811046511627906, |
|
"grad_norm": 1.182873125632428, |
|
"learning_rate": 9.901309038130392e-06, |
|
"loss": 2.3266, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09883720930232558, |
|
"grad_norm": 1.155380615425545, |
|
"learning_rate": 9.89895326021134e-06, |
|
"loss": 2.3379, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09956395348837209, |
|
"grad_norm": 1.0407717760927417, |
|
"learning_rate": 9.896569983362632e-06, |
|
"loss": 2.3071, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1002906976744186, |
|
"grad_norm": 1.1815278271760303, |
|
"learning_rate": 9.894159220962138e-06, |
|
"loss": 2.3706, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.10101744186046512, |
|
"grad_norm": 1.7394010037009506, |
|
"learning_rate": 9.891720986542011e-06, |
|
"loss": 2.2908, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.10174418604651163, |
|
"grad_norm": 1.247501930958456, |
|
"learning_rate": 9.889255293788613e-06, |
|
"loss": 2.2802, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10247093023255814, |
|
"grad_norm": 1.376771705154438, |
|
"learning_rate": 9.886762156542428e-06, |
|
"loss": 2.3189, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.10319767441860465, |
|
"grad_norm": 1.0539145361934386, |
|
"learning_rate": 9.884241588798004e-06, |
|
"loss": 2.3588, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.10392441860465117, |
|
"grad_norm": 1.117473895172851, |
|
"learning_rate": 9.881693604703853e-06, |
|
"loss": 2.3647, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.10465116279069768, |
|
"grad_norm": 1.1230253756584534, |
|
"learning_rate": 9.879118218562384e-06, |
|
"loss": 2.3987, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.10537790697674419, |
|
"grad_norm": 1.0869199427758438, |
|
"learning_rate": 9.876515444829822e-06, |
|
"loss": 2.3608, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.10610465116279069, |
|
"grad_norm": 1.175972616599383, |
|
"learning_rate": 9.873885298116123e-06, |
|
"loss": 2.3339, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.1068313953488372, |
|
"grad_norm": 1.1287432192217346, |
|
"learning_rate": 9.871227793184893e-06, |
|
"loss": 2.3134, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.10755813953488372, |
|
"grad_norm": 1.455085174866513, |
|
"learning_rate": 9.868542944953304e-06, |
|
"loss": 2.3109, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.10828488372093023, |
|
"grad_norm": 1.175215614103579, |
|
"learning_rate": 9.865830768492019e-06, |
|
"loss": 2.3118, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.10901162790697674, |
|
"grad_norm": 1.1557846646815062, |
|
"learning_rate": 9.863091279025095e-06, |
|
"loss": 2.2978, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10973837209302326, |
|
"grad_norm": 1.1458120293970389, |
|
"learning_rate": 9.860324491929905e-06, |
|
"loss": 2.3072, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11046511627906977, |
|
"grad_norm": 1.3189900091924425, |
|
"learning_rate": 9.857530422737045e-06, |
|
"loss": 2.3174, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11119186046511628, |
|
"grad_norm": 1.1143078767141845, |
|
"learning_rate": 9.854709087130261e-06, |
|
"loss": 2.3583, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1119186046511628, |
|
"grad_norm": 1.1170390691615044, |
|
"learning_rate": 9.851860500946342e-06, |
|
"loss": 2.3226, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11264534883720931, |
|
"grad_norm": 1.1117837847322765, |
|
"learning_rate": 9.848984680175049e-06, |
|
"loss": 2.3086, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.11337209302325581, |
|
"grad_norm": 1.5822415450769225, |
|
"learning_rate": 9.846081640959008e-06, |
|
"loss": 2.3525, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11409883720930232, |
|
"grad_norm": 1.4781679250339181, |
|
"learning_rate": 9.843151399593636e-06, |
|
"loss": 2.3371, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11482558139534883, |
|
"grad_norm": 1.1460798573233948, |
|
"learning_rate": 9.840193972527037e-06, |
|
"loss": 2.238, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11555232558139535, |
|
"grad_norm": 1.2596341406617226, |
|
"learning_rate": 9.837209376359918e-06, |
|
"loss": 2.3571, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.11627906976744186, |
|
"grad_norm": 1.1138886353351996, |
|
"learning_rate": 9.834197627845488e-06, |
|
"loss": 2.3476, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11700581395348837, |
|
"grad_norm": 1.141413584651315, |
|
"learning_rate": 9.831158743889373e-06, |
|
"loss": 2.3456, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.11773255813953488, |
|
"grad_norm": 1.1691072792164279, |
|
"learning_rate": 9.828092741549513e-06, |
|
"loss": 2.3417, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.1184593023255814, |
|
"grad_norm": 1.2184258738041083, |
|
"learning_rate": 9.82499963803607e-06, |
|
"loss": 2.267, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11918604651162791, |
|
"grad_norm": 1.1576173045243061, |
|
"learning_rate": 9.821879450711336e-06, |
|
"loss": 2.3378, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11991279069767442, |
|
"grad_norm": 1.0187703916093862, |
|
"learning_rate": 9.81873219708962e-06, |
|
"loss": 2.3258, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.12063953488372094, |
|
"grad_norm": 1.1463683129424287, |
|
"learning_rate": 9.815557894837171e-06, |
|
"loss": 2.3341, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12136627906976744, |
|
"grad_norm": 1.1232938117798668, |
|
"learning_rate": 9.81235656177206e-06, |
|
"loss": 2.3387, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12209302325581395, |
|
"grad_norm": 1.448813798580779, |
|
"learning_rate": 9.809128215864096e-06, |
|
"loss": 2.3542, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12281976744186046, |
|
"grad_norm": 1.2584761113731575, |
|
"learning_rate": 9.80587287523471e-06, |
|
"loss": 2.328, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12354651162790697, |
|
"grad_norm": 1.1315037986766692, |
|
"learning_rate": 9.802590558156863e-06, |
|
"loss": 2.3566, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12427325581395349, |
|
"grad_norm": 1.124447003450953, |
|
"learning_rate": 9.79928128305494e-06, |
|
"loss": 2.3846, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 1.3268576225981397, |
|
"learning_rate": 9.795945068504654e-06, |
|
"loss": 2.3277, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1257267441860465, |
|
"grad_norm": 1.104840140367407, |
|
"learning_rate": 9.792581933232924e-06, |
|
"loss": 2.3476, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.12645348837209303, |
|
"grad_norm": 1.2928119161216733, |
|
"learning_rate": 9.789191896117786e-06, |
|
"loss": 2.3244, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.12718023255813954, |
|
"grad_norm": 1.2396436814423948, |
|
"learning_rate": 9.78577497618829e-06, |
|
"loss": 2.353, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12790697674418605, |
|
"grad_norm": 1.103208759457441, |
|
"learning_rate": 9.782331192624372e-06, |
|
"loss": 2.383, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.12863372093023256, |
|
"grad_norm": 2.0646273194959797, |
|
"learning_rate": 9.778860564756769e-06, |
|
"loss": 2.3285, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.12936046511627908, |
|
"grad_norm": 1.1841117516704331, |
|
"learning_rate": 9.775363112066897e-06, |
|
"loss": 2.3602, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1300872093023256, |
|
"grad_norm": 1.465105518576992, |
|
"learning_rate": 9.771838854186748e-06, |
|
"loss": 2.3446, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1308139534883721, |
|
"grad_norm": 1.2274769216909944, |
|
"learning_rate": 9.768287810898773e-06, |
|
"loss": 2.3527, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13154069767441862, |
|
"grad_norm": 1.778850283425946, |
|
"learning_rate": 9.764710002135784e-06, |
|
"loss": 2.4089, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.13226744186046513, |
|
"grad_norm": 1.2955538543537204, |
|
"learning_rate": 9.761105447980824e-06, |
|
"loss": 2.3727, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13299418604651161, |
|
"grad_norm": 1.8038989790897118, |
|
"learning_rate": 9.757474168667072e-06, |
|
"loss": 2.3779, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13372093023255813, |
|
"grad_norm": 1.4246329229100207, |
|
"learning_rate": 9.753816184577715e-06, |
|
"loss": 2.3381, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13444767441860464, |
|
"grad_norm": 1.5788899090032442, |
|
"learning_rate": 9.750131516245844e-06, |
|
"loss": 2.2993, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.13517441860465115, |
|
"grad_norm": 1.485106172355965, |
|
"learning_rate": 9.746420184354334e-06, |
|
"loss": 2.3213, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.13590116279069767, |
|
"grad_norm": 1.424583265290273, |
|
"learning_rate": 9.742682209735727e-06, |
|
"loss": 2.3239, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.13662790697674418, |
|
"grad_norm": 1.3327881052111739, |
|
"learning_rate": 9.738917613372121e-06, |
|
"loss": 2.3294, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1373546511627907, |
|
"grad_norm": 1.28657064764393, |
|
"learning_rate": 9.73512641639504e-06, |
|
"loss": 2.3268, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1380813953488372, |
|
"grad_norm": 1.325094868563775, |
|
"learning_rate": 9.731308640085329e-06, |
|
"loss": 2.3798, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13880813953488372, |
|
"grad_norm": 1.417161190490927, |
|
"learning_rate": 9.72746430587303e-06, |
|
"loss": 2.3363, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13953488372093023, |
|
"grad_norm": 1.44872426301407, |
|
"learning_rate": 9.723593435337252e-06, |
|
"loss": 2.3349, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.14026162790697674, |
|
"grad_norm": 1.5448029589257748, |
|
"learning_rate": 9.719696050206072e-06, |
|
"loss": 2.3632, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.14098837209302326, |
|
"grad_norm": 1.5859763359793375, |
|
"learning_rate": 9.715772172356388e-06, |
|
"loss": 2.2827, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.14171511627906977, |
|
"grad_norm": 1.184747752110052, |
|
"learning_rate": 9.711821823813812e-06, |
|
"loss": 2.3393, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14244186046511628, |
|
"grad_norm": 1.4867726022411942, |
|
"learning_rate": 9.70784502675254e-06, |
|
"loss": 2.3275, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1431686046511628, |
|
"grad_norm": 1.1538729596914152, |
|
"learning_rate": 9.703841803495234e-06, |
|
"loss": 2.3359, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1438953488372093, |
|
"grad_norm": 1.6929885708609627, |
|
"learning_rate": 9.699812176512887e-06, |
|
"loss": 2.3807, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.14462209302325582, |
|
"grad_norm": 1.1679128487047015, |
|
"learning_rate": 9.695756168424703e-06, |
|
"loss": 2.2929, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.14534883720930233, |
|
"grad_norm": 3.228739152070278, |
|
"learning_rate": 9.691673801997974e-06, |
|
"loss": 2.3188, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14607558139534885, |
|
"grad_norm": 2.0854780633686856, |
|
"learning_rate": 9.68756510014794e-06, |
|
"loss": 2.3525, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.14680232558139536, |
|
"grad_norm": 1.4863284919761375, |
|
"learning_rate": 9.683430085937672e-06, |
|
"loss": 2.3462, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.14752906976744187, |
|
"grad_norm": 1.7980518257689877, |
|
"learning_rate": 9.67926878257794e-06, |
|
"loss": 2.3268, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.14825581395348839, |
|
"grad_norm": 1.4394065482189453, |
|
"learning_rate": 9.675081213427076e-06, |
|
"loss": 2.3802, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.14898255813953487, |
|
"grad_norm": 2.281476790918211, |
|
"learning_rate": 9.67086740199085e-06, |
|
"loss": 2.2903, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14970930232558138, |
|
"grad_norm": 2.1550730313406596, |
|
"learning_rate": 9.666627371922335e-06, |
|
"loss": 2.3204, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.1504360465116279, |
|
"grad_norm": 1.1250028145242355, |
|
"learning_rate": 9.66236114702178e-06, |
|
"loss": 2.3396, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1511627906976744, |
|
"grad_norm": 1.547403727782741, |
|
"learning_rate": 9.658068751236464e-06, |
|
"loss": 2.3074, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.15188953488372092, |
|
"grad_norm": 1.274380714094946, |
|
"learning_rate": 9.653750208660577e-06, |
|
"loss": 2.3541, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.15261627906976744, |
|
"grad_norm": 2.134091901741687, |
|
"learning_rate": 9.649405543535067e-06, |
|
"loss": 2.3642, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15334302325581395, |
|
"grad_norm": 1.8269953720467489, |
|
"learning_rate": 9.645034780247521e-06, |
|
"loss": 2.2898, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.15406976744186046, |
|
"grad_norm": 1.2440228463941307, |
|
"learning_rate": 9.640637943332025e-06, |
|
"loss": 2.3005, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.15479651162790697, |
|
"grad_norm": 1.6575430787365315, |
|
"learning_rate": 9.636215057469009e-06, |
|
"loss": 2.3413, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1555232558139535, |
|
"grad_norm": 1.1243540368900853, |
|
"learning_rate": 9.631766147485131e-06, |
|
"loss": 2.3274, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 1.5183470450257768, |
|
"learning_rate": 9.627291238353127e-06, |
|
"loss": 2.3051, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1569767441860465, |
|
"grad_norm": 1.5119397631588836, |
|
"learning_rate": 9.622790355191672e-06, |
|
"loss": 2.3259, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.15770348837209303, |
|
"grad_norm": 1.4922176869104902, |
|
"learning_rate": 9.618263523265238e-06, |
|
"loss": 2.3639, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.15843023255813954, |
|
"grad_norm": 1.4730383961473745, |
|
"learning_rate": 9.613710767983953e-06, |
|
"loss": 2.3338, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.15915697674418605, |
|
"grad_norm": 1.2439501029680762, |
|
"learning_rate": 9.609132114903458e-06, |
|
"loss": 2.3107, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.15988372093023256, |
|
"grad_norm": 1.2143771364643452, |
|
"learning_rate": 9.60452758972477e-06, |
|
"loss": 2.3044, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16061046511627908, |
|
"grad_norm": 1.2334867243523404, |
|
"learning_rate": 9.599897218294122e-06, |
|
"loss": 2.2814, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1613372093023256, |
|
"grad_norm": 1.156597773993428, |
|
"learning_rate": 9.595241026602836e-06, |
|
"loss": 2.2877, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1620639534883721, |
|
"grad_norm": 1.2761713517213147, |
|
"learning_rate": 9.590559040787168e-06, |
|
"loss": 2.328, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.16279069767441862, |
|
"grad_norm": 1.7761321590409704, |
|
"learning_rate": 9.585851287128157e-06, |
|
"loss": 2.3159, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.16351744186046513, |
|
"grad_norm": 1.163745695967623, |
|
"learning_rate": 9.581117792051487e-06, |
|
"loss": 2.3469, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.16424418604651161, |
|
"grad_norm": 1.133988790541161, |
|
"learning_rate": 9.576358582127334e-06, |
|
"loss": 2.3531, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.16497093023255813, |
|
"grad_norm": 1.1024750427768464, |
|
"learning_rate": 9.57157368407022e-06, |
|
"loss": 2.3779, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.16569767441860464, |
|
"grad_norm": 1.2517859254831207, |
|
"learning_rate": 9.56676312473885e-06, |
|
"loss": 2.2703, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.16642441860465115, |
|
"grad_norm": 1.073885261058524, |
|
"learning_rate": 9.561926931135985e-06, |
|
"loss": 2.3131, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.16715116279069767, |
|
"grad_norm": 1.1127469264086771, |
|
"learning_rate": 9.557065130408267e-06, |
|
"loss": 2.3835, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16787790697674418, |
|
"grad_norm": 1.1959973006506952, |
|
"learning_rate": 9.552177749846083e-06, |
|
"loss": 2.3338, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.1686046511627907, |
|
"grad_norm": 1.1058947069586706, |
|
"learning_rate": 9.5472648168834e-06, |
|
"loss": 2.3255, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.1693313953488372, |
|
"grad_norm": 1.116754555488762, |
|
"learning_rate": 9.542326359097619e-06, |
|
"loss": 2.3494, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.17005813953488372, |
|
"grad_norm": 1.0876854570886407, |
|
"learning_rate": 9.537362404209419e-06, |
|
"loss": 2.3791, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.17078488372093023, |
|
"grad_norm": 1.2137372127091681, |
|
"learning_rate": 9.532372980082598e-06, |
|
"loss": 2.2718, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.17151162790697674, |
|
"grad_norm": 1.129260401791111, |
|
"learning_rate": 9.527358114723917e-06, |
|
"loss": 2.3639, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.17223837209302326, |
|
"grad_norm": 1.2340740847840952, |
|
"learning_rate": 9.522317836282949e-06, |
|
"loss": 2.3132, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.17296511627906977, |
|
"grad_norm": 1.1219295749886464, |
|
"learning_rate": 9.517252173051912e-06, |
|
"loss": 2.3437, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.17369186046511628, |
|
"grad_norm": 1.1822380144608287, |
|
"learning_rate": 9.512161153465518e-06, |
|
"loss": 2.3267, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1744186046511628, |
|
"grad_norm": 1.1180365462119664, |
|
"learning_rate": 9.507044806100806e-06, |
|
"loss": 2.2838, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1751453488372093, |
|
"grad_norm": 1.2103630698428618, |
|
"learning_rate": 9.501903159676993e-06, |
|
"loss": 2.3157, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.17587209302325582, |
|
"grad_norm": 1.6042659029711763, |
|
"learning_rate": 9.496736243055293e-06, |
|
"loss": 2.302, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.17659883720930233, |
|
"grad_norm": 1.142876951018072, |
|
"learning_rate": 9.491544085238778e-06, |
|
"loss": 2.3086, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.17732558139534885, |
|
"grad_norm": 1.2940579158130927, |
|
"learning_rate": 9.486326715372201e-06, |
|
"loss": 2.2899, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.17805232558139536, |
|
"grad_norm": 1.2892835086318652, |
|
"learning_rate": 9.481084162741835e-06, |
|
"loss": 2.3795, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17877906976744187, |
|
"grad_norm": 1.0727384912534383, |
|
"learning_rate": 9.475816456775313e-06, |
|
"loss": 2.3633, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.17950581395348839, |
|
"grad_norm": 1.1365478569616185, |
|
"learning_rate": 9.470523627041452e-06, |
|
"loss": 2.3659, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.18023255813953487, |
|
"grad_norm": 1.1291795803936022, |
|
"learning_rate": 9.465205703250105e-06, |
|
"loss": 2.325, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.18095930232558138, |
|
"grad_norm": 1.2835767823611957, |
|
"learning_rate": 9.459862715251973e-06, |
|
"loss": 2.2822, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.1816860465116279, |
|
"grad_norm": 1.2029135874825383, |
|
"learning_rate": 9.454494693038455e-06, |
|
"loss": 2.3935, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1824127906976744, |
|
"grad_norm": 1.134998330696039, |
|
"learning_rate": 9.44910166674147e-06, |
|
"loss": 2.3326, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.18313953488372092, |
|
"grad_norm": 1.2024738851493837, |
|
"learning_rate": 9.44368366663329e-06, |
|
"loss": 2.3289, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.18386627906976744, |
|
"grad_norm": 1.204843905841385, |
|
"learning_rate": 9.438240723126376e-06, |
|
"loss": 2.3165, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.18459302325581395, |
|
"grad_norm": 1.364897858166092, |
|
"learning_rate": 9.43277286677319e-06, |
|
"loss": 2.3432, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.18531976744186046, |
|
"grad_norm": 1.1892133781450915, |
|
"learning_rate": 9.427280128266049e-06, |
|
"loss": 2.329, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.18604651162790697, |
|
"grad_norm": 1.2959664242769635, |
|
"learning_rate": 9.421762538436933e-06, |
|
"loss": 2.3469, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.1867732558139535, |
|
"grad_norm": 1.4188951604829714, |
|
"learning_rate": 9.416220128257317e-06, |
|
"loss": 2.3602, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 1.15071972840638, |
|
"learning_rate": 9.410652928837998e-06, |
|
"loss": 2.3189, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.1882267441860465, |
|
"grad_norm": 1.1635890361824703, |
|
"learning_rate": 9.405060971428924e-06, |
|
"loss": 2.3282, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.18895348837209303, |
|
"grad_norm": 1.173051490771907, |
|
"learning_rate": 9.399444287419012e-06, |
|
"loss": 2.3391, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18968023255813954, |
|
"grad_norm": 1.4480462335454614, |
|
"learning_rate": 9.393802908335978e-06, |
|
"loss": 2.3273, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.19040697674418605, |
|
"grad_norm": 1.3645358660521583, |
|
"learning_rate": 9.388136865846153e-06, |
|
"loss": 2.3126, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.19113372093023256, |
|
"grad_norm": 1.3259099589635823, |
|
"learning_rate": 9.382446191754313e-06, |
|
"loss": 2.3002, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.19186046511627908, |
|
"grad_norm": 1.2243958605804686, |
|
"learning_rate": 9.376730918003495e-06, |
|
"loss": 2.2832, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.1925872093023256, |
|
"grad_norm": 1.3154278911242208, |
|
"learning_rate": 9.370991076674821e-06, |
|
"loss": 2.3551, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.1933139534883721, |
|
"grad_norm": 1.1106953939990531, |
|
"learning_rate": 9.36522669998731e-06, |
|
"loss": 2.2829, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.19404069767441862, |
|
"grad_norm": 1.1959726898905372, |
|
"learning_rate": 9.359437820297716e-06, |
|
"loss": 2.3021, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.19476744186046513, |
|
"grad_norm": 1.227387261796965, |
|
"learning_rate": 9.353624470100321e-06, |
|
"loss": 2.3884, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.19549418604651161, |
|
"grad_norm": 1.4504794371079932, |
|
"learning_rate": 9.347786682026774e-06, |
|
"loss": 2.342, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.19622093023255813, |
|
"grad_norm": 1.1512546090637321, |
|
"learning_rate": 9.341924488845892e-06, |
|
"loss": 2.3033, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19694767441860464, |
|
"grad_norm": 1.4217379552780167, |
|
"learning_rate": 9.336037923463494e-06, |
|
"loss": 2.3293, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.19767441860465115, |
|
"grad_norm": 1.3571520758260824, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 2.2994, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.19840116279069767, |
|
"grad_norm": 1.4540212433725141, |
|
"learning_rate": 9.324191808401235e-06, |
|
"loss": 2.2782, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.19912790697674418, |
|
"grad_norm": 1.4077888491936041, |
|
"learning_rate": 9.31823232521629e-06, |
|
"loss": 2.4051, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.1998546511627907, |
|
"grad_norm": 1.2165690102619144, |
|
"learning_rate": 9.312248602819284e-06, |
|
"loss": 2.3455, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2005813953488372, |
|
"grad_norm": 1.2756272884495146, |
|
"learning_rate": 9.306240674798203e-06, |
|
"loss": 2.3318, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.20130813953488372, |
|
"grad_norm": 1.56568088675301, |
|
"learning_rate": 9.300208574876897e-06, |
|
"loss": 2.3476, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.20203488372093023, |
|
"grad_norm": 1.2769102850208098, |
|
"learning_rate": 9.294152336914907e-06, |
|
"loss": 2.3364, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.20276162790697674, |
|
"grad_norm": 1.1982423239697224, |
|
"learning_rate": 9.288071994907262e-06, |
|
"loss": 2.3306, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.20348837209302326, |
|
"grad_norm": 1.1679661222509305, |
|
"learning_rate": 9.281967582984292e-06, |
|
"loss": 2.3281, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20421511627906977, |
|
"grad_norm": 1.4369131979668328, |
|
"learning_rate": 9.275839135411439e-06, |
|
"loss": 2.362, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.20494186046511628, |
|
"grad_norm": 1.1740960575913133, |
|
"learning_rate": 9.269686686589063e-06, |
|
"loss": 2.3215, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2056686046511628, |
|
"grad_norm": 1.2289991464520638, |
|
"learning_rate": 9.263510271052243e-06, |
|
"loss": 2.3116, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2063953488372093, |
|
"grad_norm": 1.1411155539417916, |
|
"learning_rate": 9.257309923470596e-06, |
|
"loss": 2.3149, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.20712209302325582, |
|
"grad_norm": 1.3599826720526693, |
|
"learning_rate": 9.251085678648072e-06, |
|
"loss": 2.329, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.20784883720930233, |
|
"grad_norm": 1.1828225896311113, |
|
"learning_rate": 9.244837571522758e-06, |
|
"loss": 2.3205, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.20857558139534885, |
|
"grad_norm": 1.3890914247072965, |
|
"learning_rate": 9.238565637166692e-06, |
|
"loss": 2.3309, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.20930232558139536, |
|
"grad_norm": 1.123989679885759, |
|
"learning_rate": 9.232269910785651e-06, |
|
"loss": 2.3634, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.21002906976744187, |
|
"grad_norm": 1.3138347474424352, |
|
"learning_rate": 9.225950427718974e-06, |
|
"loss": 2.3247, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.21075581395348839, |
|
"grad_norm": 1.2441549630025073, |
|
"learning_rate": 9.219607223439343e-06, |
|
"loss": 2.2984, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21148255813953487, |
|
"grad_norm": 1.3393817124230734, |
|
"learning_rate": 9.213240333552589e-06, |
|
"loss": 2.3352, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.21220930232558138, |
|
"grad_norm": 1.1731832742101422, |
|
"learning_rate": 9.206849793797508e-06, |
|
"loss": 2.3193, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2129360465116279, |
|
"grad_norm": 1.250178641486035, |
|
"learning_rate": 9.200435640045637e-06, |
|
"loss": 2.3277, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2136627906976744, |
|
"grad_norm": 2.0249942406767008, |
|
"learning_rate": 9.193997908301069e-06, |
|
"loss": 2.3586, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.21438953488372092, |
|
"grad_norm": 1.1502433960463287, |
|
"learning_rate": 9.187536634700244e-06, |
|
"loss": 2.3408, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.21511627906976744, |
|
"grad_norm": 1.5875909129160295, |
|
"learning_rate": 9.181051855511749e-06, |
|
"loss": 2.3472, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.21584302325581395, |
|
"grad_norm": 1.134211330712058, |
|
"learning_rate": 9.174543607136111e-06, |
|
"loss": 2.3133, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.21656976744186046, |
|
"grad_norm": 1.103871415303576, |
|
"learning_rate": 9.168011926105598e-06, |
|
"loss": 2.32, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.21729651162790697, |
|
"grad_norm": 1.143028922746832, |
|
"learning_rate": 9.161456849084007e-06, |
|
"loss": 2.3567, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.2180232558139535, |
|
"grad_norm": 1.1202287953390837, |
|
"learning_rate": 9.154878412866465e-06, |
|
"loss": 2.319, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 1.202255264308543, |
|
"learning_rate": 9.14827665437922e-06, |
|
"loss": 2.3689, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.2194767441860465, |
|
"grad_norm": 1.1530745059451764, |
|
"learning_rate": 9.141651610679427e-06, |
|
"loss": 2.3254, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.22020348837209303, |
|
"grad_norm": 1.0561589105093774, |
|
"learning_rate": 9.135003318954954e-06, |
|
"loss": 2.3005, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.22093023255813954, |
|
"grad_norm": 1.3437348779767586, |
|
"learning_rate": 9.12833181652416e-06, |
|
"loss": 2.2729, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.22165697674418605, |
|
"grad_norm": 1.2231858657294308, |
|
"learning_rate": 9.121637140835696e-06, |
|
"loss": 2.3805, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.22238372093023256, |
|
"grad_norm": 1.1186730722598155, |
|
"learning_rate": 9.114919329468283e-06, |
|
"loss": 2.3557, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.22311046511627908, |
|
"grad_norm": 1.2141698238590986, |
|
"learning_rate": 9.108178420130514e-06, |
|
"loss": 2.3878, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2238372093023256, |
|
"grad_norm": 1.265580530868728, |
|
"learning_rate": 9.101414450660633e-06, |
|
"loss": 2.363, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2245639534883721, |
|
"grad_norm": 1.1350587482142922, |
|
"learning_rate": 9.094627459026326e-06, |
|
"loss": 2.3463, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.22529069767441862, |
|
"grad_norm": 1.2899696901740874, |
|
"learning_rate": 9.087817483324507e-06, |
|
"loss": 2.3027, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22601744186046513, |
|
"grad_norm": 1.2739682151115492, |
|
"learning_rate": 9.08098456178111e-06, |
|
"loss": 2.2623, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.22674418604651161, |
|
"grad_norm": 1.1956689065395163, |
|
"learning_rate": 9.074128732750859e-06, |
|
"loss": 2.3667, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.22747093023255813, |
|
"grad_norm": 1.3494480852964184, |
|
"learning_rate": 9.067250034717072e-06, |
|
"loss": 2.3396, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.22819767441860464, |
|
"grad_norm": 1.2344725135239982, |
|
"learning_rate": 9.060348506291432e-06, |
|
"loss": 2.3715, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.22892441860465115, |
|
"grad_norm": 1.3058573727875196, |
|
"learning_rate": 9.053424186213776e-06, |
|
"loss": 2.3339, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.22965116279069767, |
|
"grad_norm": 1.2079684988702277, |
|
"learning_rate": 9.046477113351871e-06, |
|
"loss": 2.3097, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.23037790697674418, |
|
"grad_norm": 1.5686728723740095, |
|
"learning_rate": 9.039507326701207e-06, |
|
"loss": 2.3182, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2311046511627907, |
|
"grad_norm": 1.132457656225435, |
|
"learning_rate": 9.032514865384767e-06, |
|
"loss": 2.3174, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2318313953488372, |
|
"grad_norm": 1.1861921100803974, |
|
"learning_rate": 9.025499768652817e-06, |
|
"loss": 2.2587, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.23255813953488372, |
|
"grad_norm": 1.1734018256359529, |
|
"learning_rate": 9.018462075882673e-06, |
|
"loss": 2.3287, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.23328488372093023, |
|
"grad_norm": 1.1680024156022957, |
|
"learning_rate": 9.011401826578492e-06, |
|
"loss": 2.3259, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.23401162790697674, |
|
"grad_norm": 1.1241565327725624, |
|
"learning_rate": 9.00431906037105e-06, |
|
"loss": 2.3353, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.23473837209302326, |
|
"grad_norm": 1.1633195222967603, |
|
"learning_rate": 8.997213817017508e-06, |
|
"loss": 2.3811, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.23546511627906977, |
|
"grad_norm": 1.32393355925031, |
|
"learning_rate": 8.990086136401199e-06, |
|
"loss": 2.2933, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.23619186046511628, |
|
"grad_norm": 1.0782668479787727, |
|
"learning_rate": 8.982936058531403e-06, |
|
"loss": 2.2711, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2369186046511628, |
|
"grad_norm": 1.1680539247789243, |
|
"learning_rate": 8.975763623543121e-06, |
|
"loss": 2.2925, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2376453488372093, |
|
"grad_norm": 1.2209650870203186, |
|
"learning_rate": 8.968568871696847e-06, |
|
"loss": 2.3007, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.23837209302325582, |
|
"grad_norm": 1.193452920111548, |
|
"learning_rate": 8.961351843378349e-06, |
|
"loss": 2.2965, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.23909883720930233, |
|
"grad_norm": 1.2288630443793624, |
|
"learning_rate": 8.95411257909843e-06, |
|
"loss": 2.3947, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.23982558139534885, |
|
"grad_norm": 1.1891549047471393, |
|
"learning_rate": 8.946851119492717e-06, |
|
"loss": 2.3313, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24055232558139536, |
|
"grad_norm": 1.3399860656299631, |
|
"learning_rate": 8.939567505321418e-06, |
|
"loss": 2.3004, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.24127906976744187, |
|
"grad_norm": 1.1419693810218348, |
|
"learning_rate": 8.932261777469105e-06, |
|
"loss": 2.3239, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.24200581395348839, |
|
"grad_norm": 1.476156520390232, |
|
"learning_rate": 8.924933976944474e-06, |
|
"loss": 2.2866, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.24273255813953487, |
|
"grad_norm": 1.1530937805371297, |
|
"learning_rate": 8.917584144880124e-06, |
|
"loss": 2.3271, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.24345930232558138, |
|
"grad_norm": 1.1115334863414545, |
|
"learning_rate": 8.910212322532317e-06, |
|
"loss": 2.3109, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2441860465116279, |
|
"grad_norm": 1.2194859856345706, |
|
"learning_rate": 8.902818551280758e-06, |
|
"loss": 2.3286, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2449127906976744, |
|
"grad_norm": 1.1114840199496325, |
|
"learning_rate": 8.895402872628352e-06, |
|
"loss": 2.3368, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.24563953488372092, |
|
"grad_norm": 1.2770635204204543, |
|
"learning_rate": 8.887965328200975e-06, |
|
"loss": 2.3251, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.24636627906976744, |
|
"grad_norm": 3.188412638877289, |
|
"learning_rate": 8.880505959747245e-06, |
|
"loss": 2.3463, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.24709302325581395, |
|
"grad_norm": 5.785905050744672, |
|
"learning_rate": 8.873024809138272e-06, |
|
"loss": 2.3595, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24781976744186046, |
|
"grad_norm": 1.5583689080056162, |
|
"learning_rate": 8.86552191836745e-06, |
|
"loss": 2.3503, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.24854651162790697, |
|
"grad_norm": 1.9351660592413844, |
|
"learning_rate": 8.857997329550195e-06, |
|
"loss": 2.3714, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.2492732558139535, |
|
"grad_norm": 1.3609293978964716, |
|
"learning_rate": 8.850451084923717e-06, |
|
"loss": 2.2474, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.203918915159538, |
|
"learning_rate": 8.842883226846792e-06, |
|
"loss": 2.3455, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2507267441860465, |
|
"grad_norm": 1.230555039754799, |
|
"learning_rate": 8.835293797799517e-06, |
|
"loss": 2.2984, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.251453488372093, |
|
"grad_norm": 1.2486863122908132, |
|
"learning_rate": 8.827682840383065e-06, |
|
"loss": 2.271, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.25218023255813954, |
|
"grad_norm": 1.5306570439449183, |
|
"learning_rate": 8.82005039731946e-06, |
|
"loss": 2.3261, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.25290697674418605, |
|
"grad_norm": 1.245699604524567, |
|
"learning_rate": 8.812396511451324e-06, |
|
"loss": 2.3407, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.25363372093023256, |
|
"grad_norm": 1.3300197575111206, |
|
"learning_rate": 8.804721225741646e-06, |
|
"loss": 2.3137, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2543604651162791, |
|
"grad_norm": 1.3585363752309658, |
|
"learning_rate": 8.797024583273536e-06, |
|
"loss": 2.3609, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2550872093023256, |
|
"grad_norm": 1.33120843382374, |
|
"learning_rate": 8.789306627249985e-06, |
|
"loss": 2.3286, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.2558139534883721, |
|
"grad_norm": 1.2345078288815257, |
|
"learning_rate": 8.781567400993617e-06, |
|
"loss": 2.2758, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2565406976744186, |
|
"grad_norm": 1.175709121820161, |
|
"learning_rate": 8.77380694794646e-06, |
|
"loss": 2.3674, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.25726744186046513, |
|
"grad_norm": 1.1079781294472077, |
|
"learning_rate": 8.766025311669685e-06, |
|
"loss": 2.3371, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.25799418604651164, |
|
"grad_norm": 1.17685761578481, |
|
"learning_rate": 8.75822253584337e-06, |
|
"loss": 2.3144, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.25872093023255816, |
|
"grad_norm": 1.1866727563492487, |
|
"learning_rate": 8.75039866426626e-06, |
|
"loss": 2.2811, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.25944767441860467, |
|
"grad_norm": 1.0868526193912587, |
|
"learning_rate": 8.742553740855507e-06, |
|
"loss": 2.309, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.2601744186046512, |
|
"grad_norm": 1.2915335811110245, |
|
"learning_rate": 8.734687809646437e-06, |
|
"loss": 2.3078, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.2609011627906977, |
|
"grad_norm": 1.1957644387801363, |
|
"learning_rate": 8.726800914792296e-06, |
|
"loss": 2.3283, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.2616279069767442, |
|
"grad_norm": 1.1600839842194066, |
|
"learning_rate": 8.718893100564002e-06, |
|
"loss": 2.3648, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2623546511627907, |
|
"grad_norm": 1.1209572775215828, |
|
"learning_rate": 8.710964411349902e-06, |
|
"loss": 2.3809, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.26308139534883723, |
|
"grad_norm": 1.1794327292754048, |
|
"learning_rate": 8.703014891655518e-06, |
|
"loss": 2.3331, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.26380813953488375, |
|
"grad_norm": 1.0931283110899968, |
|
"learning_rate": 8.695044586103297e-06, |
|
"loss": 2.3286, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.26453488372093026, |
|
"grad_norm": 1.1015327533697825, |
|
"learning_rate": 8.687053539432358e-06, |
|
"loss": 2.3201, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.26526162790697677, |
|
"grad_norm": 1.1199381476087624, |
|
"learning_rate": 8.679041796498253e-06, |
|
"loss": 2.3103, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.26598837209302323, |
|
"grad_norm": 1.271016400767226, |
|
"learning_rate": 8.6710094022727e-06, |
|
"loss": 2.3066, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.26671511627906974, |
|
"grad_norm": 1.2726894817812915, |
|
"learning_rate": 8.66295640184334e-06, |
|
"loss": 2.3731, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.26744186046511625, |
|
"grad_norm": 1.2171370056189053, |
|
"learning_rate": 8.65488284041348e-06, |
|
"loss": 2.3446, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.26816860465116277, |
|
"grad_norm": 1.1315553912514624, |
|
"learning_rate": 8.646788763301842e-06, |
|
"loss": 2.3218, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2688953488372093, |
|
"grad_norm": 1.0962417262456845, |
|
"learning_rate": 8.638674215942307e-06, |
|
"loss": 2.2953, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2696220930232558, |
|
"grad_norm": 1.2587797644690477, |
|
"learning_rate": 8.630539243883659e-06, |
|
"loss": 2.3589, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2703488372093023, |
|
"grad_norm": 1.1337580563036955, |
|
"learning_rate": 8.62238389278933e-06, |
|
"loss": 2.3077, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2710755813953488, |
|
"grad_norm": 1.4161156234882568, |
|
"learning_rate": 8.61420820843715e-06, |
|
"loss": 2.2961, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.27180232558139533, |
|
"grad_norm": 1.0912056466351248, |
|
"learning_rate": 8.606012236719073e-06, |
|
"loss": 2.309, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.27252906976744184, |
|
"grad_norm": 1.2422820682329003, |
|
"learning_rate": 8.59779602364094e-06, |
|
"loss": 2.3833, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.27325581395348836, |
|
"grad_norm": 1.1217055100092126, |
|
"learning_rate": 8.58955961532221e-06, |
|
"loss": 2.3043, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.27398255813953487, |
|
"grad_norm": 1.2187038590832011, |
|
"learning_rate": 8.581303057995697e-06, |
|
"loss": 2.2918, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2747093023255814, |
|
"grad_norm": 1.3062238711621994, |
|
"learning_rate": 8.573026398007323e-06, |
|
"loss": 2.3038, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2754360465116279, |
|
"grad_norm": 1.2074262903566075, |
|
"learning_rate": 8.564729681815846e-06, |
|
"loss": 2.2956, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2761627906976744, |
|
"grad_norm": 1.225859930553607, |
|
"learning_rate": 8.556412955992604e-06, |
|
"loss": 2.3516, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2768895348837209, |
|
"grad_norm": 1.0827364010463423, |
|
"learning_rate": 8.548076267221258e-06, |
|
"loss": 2.2869, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.27761627906976744, |
|
"grad_norm": 1.0860309795926768, |
|
"learning_rate": 8.539719662297519e-06, |
|
"loss": 2.3179, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.27834302325581395, |
|
"grad_norm": 1.2009020460108055, |
|
"learning_rate": 8.531343188128896e-06, |
|
"loss": 2.3978, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.27906976744186046, |
|
"grad_norm": 1.0723553238260994, |
|
"learning_rate": 8.52294689173443e-06, |
|
"loss": 2.3311, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.279796511627907, |
|
"grad_norm": 1.2308339588445456, |
|
"learning_rate": 8.514530820244427e-06, |
|
"loss": 2.3072, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.2805232558139535, |
|
"grad_norm": 1.443673625883687, |
|
"learning_rate": 8.506095020900192e-06, |
|
"loss": 2.3471, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 1.2323755395885254, |
|
"learning_rate": 8.497639541053769e-06, |
|
"loss": 2.3082, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2819767441860465, |
|
"grad_norm": 1.0985843059579208, |
|
"learning_rate": 8.489164428167677e-06, |
|
"loss": 2.3291, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.282703488372093, |
|
"grad_norm": 1.2185119408676808, |
|
"learning_rate": 8.480669729814635e-06, |
|
"loss": 2.2915, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.28343023255813954, |
|
"grad_norm": 1.2547102455831876, |
|
"learning_rate": 8.472155493677299e-06, |
|
"loss": 2.3284, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.28415697674418605, |
|
"grad_norm": 1.241863166940908, |
|
"learning_rate": 8.463621767547998e-06, |
|
"loss": 2.2481, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.28488372093023256, |
|
"grad_norm": 1.0588850602261963, |
|
"learning_rate": 8.455068599328462e-06, |
|
"loss": 2.3331, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.2856104651162791, |
|
"grad_norm": 1.1212857689423208, |
|
"learning_rate": 8.446496037029555e-06, |
|
"loss": 2.2994, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.2863372093023256, |
|
"grad_norm": 1.126135345225652, |
|
"learning_rate": 8.437904128770999e-06, |
|
"loss": 2.3206, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.2870639534883721, |
|
"grad_norm": 1.298548137211412, |
|
"learning_rate": 8.429292922781115e-06, |
|
"loss": 2.2919, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2877906976744186, |
|
"grad_norm": 1.1255687465878497, |
|
"learning_rate": 8.420662467396548e-06, |
|
"loss": 2.3118, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.28851744186046513, |
|
"grad_norm": 1.1625027040610958, |
|
"learning_rate": 8.412012811061985e-06, |
|
"loss": 2.3196, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.28924418604651164, |
|
"grad_norm": 1.178266615842495, |
|
"learning_rate": 8.403344002329901e-06, |
|
"loss": 2.2574, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.28997093023255816, |
|
"grad_norm": 1.6209181315673342, |
|
"learning_rate": 8.394656089860274e-06, |
|
"loss": 2.3217, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.29069767441860467, |
|
"grad_norm": 1.4055391277869425, |
|
"learning_rate": 8.385949122420318e-06, |
|
"loss": 2.3053, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2914244186046512, |
|
"grad_norm": 1.0837894019376972, |
|
"learning_rate": 8.377223148884202e-06, |
|
"loss": 2.3235, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.2921511627906977, |
|
"grad_norm": 1.1567143322087055, |
|
"learning_rate": 8.368478218232787e-06, |
|
"loss": 2.2682, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.2928779069767442, |
|
"grad_norm": 1.2833964025525515, |
|
"learning_rate": 8.359714379553338e-06, |
|
"loss": 2.3368, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.2936046511627907, |
|
"grad_norm": 1.1703928335892075, |
|
"learning_rate": 8.350931682039262e-06, |
|
"loss": 2.3692, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.29433139534883723, |
|
"grad_norm": 1.2278640949453006, |
|
"learning_rate": 8.342130174989819e-06, |
|
"loss": 2.3298, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.29505813953488375, |
|
"grad_norm": 1.2391820171446353, |
|
"learning_rate": 8.333309907809852e-06, |
|
"loss": 2.3221, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.29578488372093026, |
|
"grad_norm": 1.1591560003156445, |
|
"learning_rate": 8.324470930009514e-06, |
|
"loss": 2.2989, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.29651162790697677, |
|
"grad_norm": 1.069079000918171, |
|
"learning_rate": 8.315613291203977e-06, |
|
"loss": 2.3257, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.29723837209302323, |
|
"grad_norm": 1.0658003352432701, |
|
"learning_rate": 8.306737041113169e-06, |
|
"loss": 2.2969, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.29796511627906974, |
|
"grad_norm": 1.2018393379444972, |
|
"learning_rate": 8.29784222956148e-06, |
|
"loss": 2.2994, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.29869186046511625, |
|
"grad_norm": 1.0743593652723726, |
|
"learning_rate": 8.288928906477497e-06, |
|
"loss": 2.3918, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.29941860465116277, |
|
"grad_norm": 1.1791792326436397, |
|
"learning_rate": 8.279997121893713e-06, |
|
"loss": 2.2971, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3001453488372093, |
|
"grad_norm": 1.068770185795778, |
|
"learning_rate": 8.271046925946247e-06, |
|
"loss": 2.3359, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.3008720930232558, |
|
"grad_norm": 1.3026079186055184, |
|
"learning_rate": 8.262078368874566e-06, |
|
"loss": 2.3961, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3015988372093023, |
|
"grad_norm": 1.0548359339086584, |
|
"learning_rate": 8.25309150102121e-06, |
|
"loss": 2.2817, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3023255813953488, |
|
"grad_norm": 1.2757882978370192, |
|
"learning_rate": 8.244086372831492e-06, |
|
"loss": 2.2907, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.30305232558139533, |
|
"grad_norm": 1.1608993852105884, |
|
"learning_rate": 8.235063034853228e-06, |
|
"loss": 2.3469, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.30377906976744184, |
|
"grad_norm": 1.6704712398247437, |
|
"learning_rate": 8.226021537736449e-06, |
|
"loss": 2.2899, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.30450581395348836, |
|
"grad_norm": 1.1076846726964071, |
|
"learning_rate": 8.216961932233118e-06, |
|
"loss": 2.2449, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.30523255813953487, |
|
"grad_norm": 1.2655876969977151, |
|
"learning_rate": 8.207884269196845e-06, |
|
"loss": 2.2987, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3059593023255814, |
|
"grad_norm": 1.1019813151108213, |
|
"learning_rate": 8.198788599582596e-06, |
|
"loss": 2.3452, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3066860465116279, |
|
"grad_norm": 1.781405867357273, |
|
"learning_rate": 8.189674974446423e-06, |
|
"loss": 2.308, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3074127906976744, |
|
"grad_norm": 1.2350247568154873, |
|
"learning_rate": 8.180543444945154e-06, |
|
"loss": 2.3308, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3081395348837209, |
|
"grad_norm": 1.9649756348954517, |
|
"learning_rate": 8.171394062336127e-06, |
|
"loss": 2.3159, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.30886627906976744, |
|
"grad_norm": 1.2739192151431729, |
|
"learning_rate": 8.162226877976886e-06, |
|
"loss": 2.2989, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.30959302325581395, |
|
"grad_norm": 1.129631923987475, |
|
"learning_rate": 8.153041943324912e-06, |
|
"loss": 2.3198, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.31031976744186046, |
|
"grad_norm": 1.3255063702568364, |
|
"learning_rate": 8.143839309937307e-06, |
|
"loss": 2.3381, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.311046511627907, |
|
"grad_norm": 1.1628422209438, |
|
"learning_rate": 8.134619029470535e-06, |
|
"loss": 2.3287, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3117732558139535, |
|
"grad_norm": 1.1887971582610084, |
|
"learning_rate": 8.125381153680103e-06, |
|
"loss": 2.3326, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 1.1714666094613004, |
|
"learning_rate": 8.116125734420297e-06, |
|
"loss": 2.3104, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3132267441860465, |
|
"grad_norm": 1.1365583272218085, |
|
"learning_rate": 8.10685282364387e-06, |
|
"loss": 2.284, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.313953488372093, |
|
"grad_norm": 1.3559133401669676, |
|
"learning_rate": 8.097562473401764e-06, |
|
"loss": 2.3029, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.31468023255813954, |
|
"grad_norm": 1.4060183717002648, |
|
"learning_rate": 8.088254735842808e-06, |
|
"loss": 2.2876, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.31540697674418605, |
|
"grad_norm": 1.285917870149192, |
|
"learning_rate": 8.078929663213432e-06, |
|
"loss": 2.3057, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.31613372093023256, |
|
"grad_norm": 1.1249535937437272, |
|
"learning_rate": 8.069587307857377e-06, |
|
"loss": 2.3215, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3168604651162791, |
|
"grad_norm": 1.0900905006814698, |
|
"learning_rate": 8.060227722215385e-06, |
|
"loss": 2.2907, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3175872093023256, |
|
"grad_norm": 1.1568687109085039, |
|
"learning_rate": 8.050850958824926e-06, |
|
"loss": 2.3176, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3183139534883721, |
|
"grad_norm": 1.2382798087134244, |
|
"learning_rate": 8.041457070319884e-06, |
|
"loss": 2.3037, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3190406976744186, |
|
"grad_norm": 1.1937622328697128, |
|
"learning_rate": 8.032046109430276e-06, |
|
"loss": 2.3076, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.31976744186046513, |
|
"grad_norm": 1.1177244010663805, |
|
"learning_rate": 8.02261812898195e-06, |
|
"loss": 2.3324, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.32049418604651164, |
|
"grad_norm": 1.127296551947173, |
|
"learning_rate": 8.013173181896283e-06, |
|
"loss": 2.3078, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.32122093023255816, |
|
"grad_norm": 1.1455250336877478, |
|
"learning_rate": 8.003711321189895e-06, |
|
"loss": 2.3523, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.32194767441860467, |
|
"grad_norm": 1.1011738742836867, |
|
"learning_rate": 7.994232599974346e-06, |
|
"loss": 2.3044, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3226744186046512, |
|
"grad_norm": 1.1216213823441015, |
|
"learning_rate": 7.984737071455834e-06, |
|
"loss": 2.2963, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.3234011627906977, |
|
"grad_norm": 1.2200505156378094, |
|
"learning_rate": 7.975224788934903e-06, |
|
"loss": 2.3331, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3241279069767442, |
|
"grad_norm": 1.1574339311156163, |
|
"learning_rate": 7.965695805806141e-06, |
|
"loss": 2.2945, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.3248546511627907, |
|
"grad_norm": 1.1286977461580614, |
|
"learning_rate": 7.95615017555788e-06, |
|
"loss": 2.308, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.32558139534883723, |
|
"grad_norm": 1.0892889308004792, |
|
"learning_rate": 7.946587951771894e-06, |
|
"loss": 2.3267, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.32630813953488375, |
|
"grad_norm": 1.1576745403605158, |
|
"learning_rate": 7.937009188123102e-06, |
|
"loss": 2.3197, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.32703488372093026, |
|
"grad_norm": 1.4094383886534938, |
|
"learning_rate": 7.927413938379268e-06, |
|
"loss": 2.3073, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.32776162790697677, |
|
"grad_norm": 1.1812098015475305, |
|
"learning_rate": 7.917802256400688e-06, |
|
"loss": 2.3307, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.32848837209302323, |
|
"grad_norm": 1.0953213999810891, |
|
"learning_rate": 7.908174196139907e-06, |
|
"loss": 2.2721, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.32921511627906974, |
|
"grad_norm": 1.148401707752194, |
|
"learning_rate": 7.898529811641393e-06, |
|
"loss": 2.3272, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.32994186046511625, |
|
"grad_norm": 1.0408799055573608, |
|
"learning_rate": 7.888869157041257e-06, |
|
"loss": 2.3114, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.33066860465116277, |
|
"grad_norm": 1.1001301089410505, |
|
"learning_rate": 7.879192286566929e-06, |
|
"loss": 2.3111, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3313953488372093, |
|
"grad_norm": 1.2045234323369642, |
|
"learning_rate": 7.869499254536865e-06, |
|
"loss": 2.3341, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3321220930232558, |
|
"grad_norm": 1.1544365222104789, |
|
"learning_rate": 7.859790115360243e-06, |
|
"loss": 2.3371, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.3328488372093023, |
|
"grad_norm": 1.2552624406434518, |
|
"learning_rate": 7.850064923536649e-06, |
|
"loss": 2.3002, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.3335755813953488, |
|
"grad_norm": 1.293746161658906, |
|
"learning_rate": 7.84032373365578e-06, |
|
"loss": 2.3692, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.33430232558139533, |
|
"grad_norm": 1.333213925468466, |
|
"learning_rate": 7.83056660039713e-06, |
|
"loss": 2.2969, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33502906976744184, |
|
"grad_norm": 1.492618082058401, |
|
"learning_rate": 7.82079357852969e-06, |
|
"loss": 2.2577, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.33575581395348836, |
|
"grad_norm": 1.15249654866146, |
|
"learning_rate": 7.811004722911637e-06, |
|
"loss": 2.3238, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.33648255813953487, |
|
"grad_norm": 1.2530632952138137, |
|
"learning_rate": 7.801200088490026e-06, |
|
"loss": 2.301, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.3372093023255814, |
|
"grad_norm": 1.2126256906203419, |
|
"learning_rate": 7.791379730300476e-06, |
|
"loss": 2.252, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3379360465116279, |
|
"grad_norm": 1.1402545118499574, |
|
"learning_rate": 7.781543703466881e-06, |
|
"loss": 2.3447, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3386627906976744, |
|
"grad_norm": 1.079289917582004, |
|
"learning_rate": 7.771692063201072e-06, |
|
"loss": 2.307, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3393895348837209, |
|
"grad_norm": 1.0890060946616744, |
|
"learning_rate": 7.76182486480253e-06, |
|
"loss": 2.2897, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.34011627906976744, |
|
"grad_norm": 1.2191356307676027, |
|
"learning_rate": 7.751942163658066e-06, |
|
"loss": 2.2806, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.34084302325581395, |
|
"grad_norm": 1.1684818562278108, |
|
"learning_rate": 7.742044015241508e-06, |
|
"loss": 2.309, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.34156976744186046, |
|
"grad_norm": 1.1378897839340987, |
|
"learning_rate": 7.7321304751134e-06, |
|
"loss": 2.3417, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.342296511627907, |
|
"grad_norm": 1.0831185805529036, |
|
"learning_rate": 7.722201598920673e-06, |
|
"loss": 2.2842, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3430232558139535, |
|
"grad_norm": 1.163651975848296, |
|
"learning_rate": 7.712257442396355e-06, |
|
"loss": 2.3188, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 1.1173941040067659, |
|
"learning_rate": 7.702298061359236e-06, |
|
"loss": 2.29, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3444767441860465, |
|
"grad_norm": 1.1553929055835899, |
|
"learning_rate": 7.692323511713568e-06, |
|
"loss": 2.3456, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.345203488372093, |
|
"grad_norm": 1.137786497705492, |
|
"learning_rate": 7.682333849448749e-06, |
|
"loss": 2.3446, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.34593023255813954, |
|
"grad_norm": 1.2480217073388282, |
|
"learning_rate": 7.672329130639007e-06, |
|
"loss": 2.3192, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.34665697674418605, |
|
"grad_norm": 1.1303930753175966, |
|
"learning_rate": 7.662309411443084e-06, |
|
"loss": 2.3791, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.34738372093023256, |
|
"grad_norm": 1.1988474595235474, |
|
"learning_rate": 7.652274748103924e-06, |
|
"loss": 2.3186, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.3481104651162791, |
|
"grad_norm": 1.1508981723474565, |
|
"learning_rate": 7.642225196948357e-06, |
|
"loss": 2.315, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.3488372093023256, |
|
"grad_norm": 1.138764144490926, |
|
"learning_rate": 7.63216081438678e-06, |
|
"loss": 2.2812, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3495639534883721, |
|
"grad_norm": 1.2032052124329502, |
|
"learning_rate": 7.622081656912842e-06, |
|
"loss": 2.3625, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3502906976744186, |
|
"grad_norm": 1.3016605768369958, |
|
"learning_rate": 7.611987781103128e-06, |
|
"loss": 2.3206, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.35101744186046513, |
|
"grad_norm": 1.0688947854232214, |
|
"learning_rate": 7.601879243616838e-06, |
|
"loss": 2.3483, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.35174418604651164, |
|
"grad_norm": 1.0249007592193522, |
|
"learning_rate": 7.5917561011954755e-06, |
|
"loss": 2.3421, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.35247093023255816, |
|
"grad_norm": 1.1709699355757717, |
|
"learning_rate": 7.581618410662519e-06, |
|
"loss": 2.3722, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.35319767441860467, |
|
"grad_norm": 1.0586329236128114, |
|
"learning_rate": 7.571466228923115e-06, |
|
"loss": 2.3478, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3539244186046512, |
|
"grad_norm": 1.0813414465551134, |
|
"learning_rate": 7.56129961296375e-06, |
|
"loss": 2.3209, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3546511627906977, |
|
"grad_norm": 1.0444974051946252, |
|
"learning_rate": 7.551118619851929e-06, |
|
"loss": 2.3114, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3553779069767442, |
|
"grad_norm": 1.5815223503399907, |
|
"learning_rate": 7.540923306735868e-06, |
|
"loss": 2.2959, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3561046511627907, |
|
"grad_norm": 1.1602847453567253, |
|
"learning_rate": 7.530713730844153e-06, |
|
"loss": 2.3397, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.35683139534883723, |
|
"grad_norm": 1.049120747351293, |
|
"learning_rate": 7.5204899494854415e-06, |
|
"loss": 2.2834, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.35755813953488375, |
|
"grad_norm": 1.2262144884196204, |
|
"learning_rate": 7.510252020048121e-06, |
|
"loss": 2.287, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.35828488372093026, |
|
"grad_norm": 1.2042548629565901, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 2.2654, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.35901162790697677, |
|
"grad_norm": 1.2530924104028403, |
|
"learning_rate": 7.489733946887982e-06, |
|
"loss": 2.3486, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.35973837209302323, |
|
"grad_norm": 1.2825193397772006, |
|
"learning_rate": 7.479453918337733e-06, |
|
"loss": 2.275, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.36046511627906974, |
|
"grad_norm": 1.3194033390036468, |
|
"learning_rate": 7.469159972053377e-06, |
|
"loss": 2.2908, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.36119186046511625, |
|
"grad_norm": 1.1152767805402615, |
|
"learning_rate": 7.458852165817153e-06, |
|
"loss": 2.3095, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.36191860465116277, |
|
"grad_norm": 1.1495601735419694, |
|
"learning_rate": 7.448530557489105e-06, |
|
"loss": 2.3252, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3626453488372093, |
|
"grad_norm": 1.0736588885000224, |
|
"learning_rate": 7.438195205006749e-06, |
|
"loss": 2.2954, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.3633720930232558, |
|
"grad_norm": 1.207665825472445, |
|
"learning_rate": 7.427846166384747e-06, |
|
"loss": 2.2967, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3640988372093023, |
|
"grad_norm": 1.18745268887086, |
|
"learning_rate": 7.417483499714589e-06, |
|
"loss": 2.2981, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.3648255813953488, |
|
"grad_norm": 1.564274356915172, |
|
"learning_rate": 7.40710726316426e-06, |
|
"loss": 2.2957, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.36555232558139533, |
|
"grad_norm": 1.290273123363002, |
|
"learning_rate": 7.396717514977916e-06, |
|
"loss": 2.3357, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.36627906976744184, |
|
"grad_norm": 1.2047088792360614, |
|
"learning_rate": 7.386314313475557e-06, |
|
"loss": 2.3721, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.36700581395348836, |
|
"grad_norm": 1.3246114123808277, |
|
"learning_rate": 7.3758977170527e-06, |
|
"loss": 2.3209, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.36773255813953487, |
|
"grad_norm": 1.1370471368395154, |
|
"learning_rate": 7.365467784180051e-06, |
|
"loss": 2.2803, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.3684593023255814, |
|
"grad_norm": 1.0769050085427025, |
|
"learning_rate": 7.355024573403174e-06, |
|
"loss": 2.3312, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.3691860465116279, |
|
"grad_norm": 1.3157313090863894, |
|
"learning_rate": 7.3445681433421675e-06, |
|
"loss": 2.3267, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.3699127906976744, |
|
"grad_norm": 1.1464812479744277, |
|
"learning_rate": 7.3340985526913335e-06, |
|
"loss": 2.3112, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.3706395348837209, |
|
"grad_norm": 1.1613820873773826, |
|
"learning_rate": 7.323615860218844e-06, |
|
"loss": 2.3007, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.37136627906976744, |
|
"grad_norm": 1.2091702257927293, |
|
"learning_rate": 7.313120124766417e-06, |
|
"loss": 2.316, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 1.3421544244903119, |
|
"learning_rate": 7.30261140524898e-06, |
|
"loss": 2.2708, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.37281976744186046, |
|
"grad_norm": 1.39687115623401, |
|
"learning_rate": 7.292089760654352e-06, |
|
"loss": 2.3492, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.373546511627907, |
|
"grad_norm": 2.8218882467926356, |
|
"learning_rate": 7.281555250042893e-06, |
|
"loss": 2.2847, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.3742732558139535, |
|
"grad_norm": 1.088850072307723, |
|
"learning_rate": 7.271007932547188e-06, |
|
"loss": 2.3115, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 1.120826175014608, |
|
"learning_rate": 7.2604478673717095e-06, |
|
"loss": 2.3288, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.3757267441860465, |
|
"grad_norm": 1.145722078384872, |
|
"learning_rate": 7.249875113792485e-06, |
|
"loss": 2.3426, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.376453488372093, |
|
"grad_norm": 1.1602292605455282, |
|
"learning_rate": 7.239289731156767e-06, |
|
"loss": 2.2989, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.37718023255813954, |
|
"grad_norm": 1.1915736998674706, |
|
"learning_rate": 7.2286917788826926e-06, |
|
"loss": 2.3374, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.37790697674418605, |
|
"grad_norm": 1.13844351172199, |
|
"learning_rate": 7.218081316458959e-06, |
|
"loss": 2.332, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.37863372093023256, |
|
"grad_norm": 1.0975887317687405, |
|
"learning_rate": 7.207458403444488e-06, |
|
"loss": 2.3168, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.3793604651162791, |
|
"grad_norm": 1.3495614353404823, |
|
"learning_rate": 7.196823099468084e-06, |
|
"loss": 2.2743, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.3800872093023256, |
|
"grad_norm": 1.1407396853075507, |
|
"learning_rate": 7.186175464228109e-06, |
|
"loss": 2.2748, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.3808139534883721, |
|
"grad_norm": 1.083025323169463, |
|
"learning_rate": 7.175515557492139e-06, |
|
"loss": 2.2926, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3815406976744186, |
|
"grad_norm": 1.2080116038697124, |
|
"learning_rate": 7.1648434390966356e-06, |
|
"loss": 2.3196, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.38226744186046513, |
|
"grad_norm": 2.066872793237152, |
|
"learning_rate": 7.154159168946607e-06, |
|
"loss": 2.2865, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.38299418604651164, |
|
"grad_norm": 1.2368945968737097, |
|
"learning_rate": 7.143462807015271e-06, |
|
"loss": 2.2789, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.38372093023255816, |
|
"grad_norm": 1.1743588332349175, |
|
"learning_rate": 7.132754413343721e-06, |
|
"loss": 2.3053, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.38444767441860467, |
|
"grad_norm": 1.4255712436197852, |
|
"learning_rate": 7.122034048040586e-06, |
|
"loss": 2.308, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.3851744186046512, |
|
"grad_norm": 1.1049951766621189, |
|
"learning_rate": 7.111301771281692e-06, |
|
"loss": 2.3054, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3859011627906977, |
|
"grad_norm": 1.1038689017404644, |
|
"learning_rate": 7.100557643309732e-06, |
|
"loss": 2.2797, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.3866279069767442, |
|
"grad_norm": 1.0536128185639355, |
|
"learning_rate": 7.089801724433918e-06, |
|
"loss": 2.3071, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.3873546511627907, |
|
"grad_norm": 1.1449743039342242, |
|
"learning_rate": 7.079034075029651e-06, |
|
"loss": 2.2961, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.38808139534883723, |
|
"grad_norm": 1.1655567692704296, |
|
"learning_rate": 7.0682547555381734e-06, |
|
"loss": 2.329, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.38880813953488375, |
|
"grad_norm": 1.162251468256884, |
|
"learning_rate": 7.057463826466235e-06, |
|
"loss": 2.2895, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.38953488372093026, |
|
"grad_norm": 1.0896289667493249, |
|
"learning_rate": 7.0466613483857615e-06, |
|
"loss": 2.3105, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.39026162790697677, |
|
"grad_norm": 1.2880741445087427, |
|
"learning_rate": 7.035847381933494e-06, |
|
"loss": 2.3064, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.39098837209302323, |
|
"grad_norm": 1.2105896256495732, |
|
"learning_rate": 7.025021987810664e-06, |
|
"loss": 2.3694, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.39171511627906974, |
|
"grad_norm": 1.3065723003897216, |
|
"learning_rate": 7.014185226782655e-06, |
|
"loss": 2.3639, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.39244186046511625, |
|
"grad_norm": 1.1070630816606901, |
|
"learning_rate": 7.003337159678649e-06, |
|
"loss": 2.3083, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.39316860465116277, |
|
"grad_norm": 1.307708843946163, |
|
"learning_rate": 6.992477847391292e-06, |
|
"loss": 2.3399, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.3938953488372093, |
|
"grad_norm": 1.122196521717046, |
|
"learning_rate": 6.981607350876357e-06, |
|
"loss": 2.3313, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.3946220930232558, |
|
"grad_norm": 1.2186143505802445, |
|
"learning_rate": 6.970725731152389e-06, |
|
"loss": 2.3405, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.3953488372093023, |
|
"grad_norm": 1.167077189138343, |
|
"learning_rate": 6.959833049300376e-06, |
|
"loss": 2.29, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.3960755813953488, |
|
"grad_norm": 1.2944725806931758, |
|
"learning_rate": 6.948929366463397e-06, |
|
"loss": 2.3098, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.39680232558139533, |
|
"grad_norm": 12.31808967596737, |
|
"learning_rate": 6.938014743846285e-06, |
|
"loss": 2.2809, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.39752906976744184, |
|
"grad_norm": 1.2870952481596472, |
|
"learning_rate": 6.927089242715277e-06, |
|
"loss": 2.3301, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.39825581395348836, |
|
"grad_norm": 1.1514862581452583, |
|
"learning_rate": 6.916152924397676e-06, |
|
"loss": 2.3583, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.39898255813953487, |
|
"grad_norm": 1.100288574688579, |
|
"learning_rate": 6.905205850281502e-06, |
|
"loss": 2.2923, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.3997093023255814, |
|
"grad_norm": 1.050534628812702, |
|
"learning_rate": 6.894248081815155e-06, |
|
"loss": 2.3055, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4004360465116279, |
|
"grad_norm": 1.3346961931360157, |
|
"learning_rate": 6.883279680507057e-06, |
|
"loss": 2.3039, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4011627906976744, |
|
"grad_norm": 1.505626927488441, |
|
"learning_rate": 6.872300707925319e-06, |
|
"loss": 2.285, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4018895348837209, |
|
"grad_norm": 1.1299060186104746, |
|
"learning_rate": 6.861311225697392e-06, |
|
"loss": 2.2638, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.40261627906976744, |
|
"grad_norm": 1.1060082205232702, |
|
"learning_rate": 6.850311295509719e-06, |
|
"loss": 2.3511, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.40334302325581395, |
|
"grad_norm": 1.1200104346671893, |
|
"learning_rate": 6.8393009791073895e-06, |
|
"loss": 2.329, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.40406976744186046, |
|
"grad_norm": 1.1476307981667215, |
|
"learning_rate": 6.828280338293792e-06, |
|
"loss": 2.3368, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.404796511627907, |
|
"grad_norm": 1.028644332291077, |
|
"learning_rate": 6.817249434930267e-06, |
|
"loss": 2.3036, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4055232558139535, |
|
"grad_norm": 1.1390289832994775, |
|
"learning_rate": 6.806208330935766e-06, |
|
"loss": 2.3334, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 1.216277772235329, |
|
"learning_rate": 6.7951570882864944e-06, |
|
"loss": 2.2741, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4069767441860465, |
|
"grad_norm": 1.344668900236945, |
|
"learning_rate": 6.784095769015573e-06, |
|
"loss": 2.3205, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.407703488372093, |
|
"grad_norm": 1.070186991237952, |
|
"learning_rate": 6.773024435212678e-06, |
|
"loss": 2.307, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.40843023255813954, |
|
"grad_norm": 1.209196957642039, |
|
"learning_rate": 6.761943149023706e-06, |
|
"loss": 2.3112, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.40915697674418605, |
|
"grad_norm": 1.1432958784507203, |
|
"learning_rate": 6.750851972650416e-06, |
|
"loss": 2.2938, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.40988372093023256, |
|
"grad_norm": 1.2247587749897832, |
|
"learning_rate": 6.739750968350081e-06, |
|
"loss": 2.2909, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4106104651162791, |
|
"grad_norm": 1.0268929904759363, |
|
"learning_rate": 6.728640198435143e-06, |
|
"loss": 2.2189, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4113372093023256, |
|
"grad_norm": 1.6530106180630812, |
|
"learning_rate": 6.717519725272859e-06, |
|
"loss": 2.3005, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.4120639534883721, |
|
"grad_norm": 1.2858847661272295, |
|
"learning_rate": 6.706389611284953e-06, |
|
"loss": 2.3367, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4127906976744186, |
|
"grad_norm": 1.0808913275520438, |
|
"learning_rate": 6.6952499189472665e-06, |
|
"loss": 2.2644, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.41351744186046513, |
|
"grad_norm": 1.0947135769666254, |
|
"learning_rate": 6.684100710789405e-06, |
|
"loss": 2.3109, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.41424418604651164, |
|
"grad_norm": 1.227905292543617, |
|
"learning_rate": 6.6729420493943875e-06, |
|
"loss": 2.3089, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41497093023255816, |
|
"grad_norm": 1.1311925494343438, |
|
"learning_rate": 6.6617739973982985e-06, |
|
"loss": 2.3306, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.41569767441860467, |
|
"grad_norm": 1.1845320813818851, |
|
"learning_rate": 6.6505966174899326e-06, |
|
"loss": 2.2807, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.4164244186046512, |
|
"grad_norm": 1.192027080183658, |
|
"learning_rate": 6.639409972410446e-06, |
|
"loss": 2.2992, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4171511627906977, |
|
"grad_norm": 1.1134220611408692, |
|
"learning_rate": 6.628214124952999e-06, |
|
"loss": 2.3198, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.4178779069767442, |
|
"grad_norm": 1.2137521582577167, |
|
"learning_rate": 6.617009137962407e-06, |
|
"loss": 2.2729, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4186046511627907, |
|
"grad_norm": 1.171459631389888, |
|
"learning_rate": 6.605795074334793e-06, |
|
"loss": 2.2871, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.41933139534883723, |
|
"grad_norm": 1.1838961162503008, |
|
"learning_rate": 6.594571997017224e-06, |
|
"loss": 2.3488, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.42005813953488375, |
|
"grad_norm": 1.1791646717778388, |
|
"learning_rate": 6.583339969007364e-06, |
|
"loss": 2.3505, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.42078488372093026, |
|
"grad_norm": 1.1787043516107347, |
|
"learning_rate": 6.57209905335312e-06, |
|
"loss": 2.3469, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.42151162790697677, |
|
"grad_norm": 1.1000497157207225, |
|
"learning_rate": 6.560849313152287e-06, |
|
"loss": 2.2874, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.42223837209302323, |
|
"grad_norm": 1.145236146723751, |
|
"learning_rate": 6.549590811552193e-06, |
|
"loss": 2.2612, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.42296511627906974, |
|
"grad_norm": 1.2072587593279676, |
|
"learning_rate": 6.538323611749351e-06, |
|
"loss": 2.3195, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.42369186046511625, |
|
"grad_norm": 1.1421104121114019, |
|
"learning_rate": 6.5270477769890906e-06, |
|
"loss": 2.2864, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.42441860465116277, |
|
"grad_norm": 1.1733190168229655, |
|
"learning_rate": 6.515763370565218e-06, |
|
"loss": 2.3614, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.4251453488372093, |
|
"grad_norm": 1.101416582117341, |
|
"learning_rate": 6.504470455819651e-06, |
|
"loss": 2.3193, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.4258720930232558, |
|
"grad_norm": 1.1869346197022288, |
|
"learning_rate": 6.493169096142068e-06, |
|
"loss": 2.2734, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.4265988372093023, |
|
"grad_norm": 1.0719625987052026, |
|
"learning_rate": 6.481859354969549e-06, |
|
"loss": 2.2967, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.4273255813953488, |
|
"grad_norm": 1.2740863995681964, |
|
"learning_rate": 6.470541295786222e-06, |
|
"loss": 2.2977, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.42805232558139533, |
|
"grad_norm": 2.0881615265610054, |
|
"learning_rate": 6.4592149821229064e-06, |
|
"loss": 2.2487, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.42877906976744184, |
|
"grad_norm": 1.0918234165843568, |
|
"learning_rate": 6.447880477556757e-06, |
|
"loss": 2.2752, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.42950581395348836, |
|
"grad_norm": 1.0976921856780906, |
|
"learning_rate": 6.436537845710904e-06, |
|
"loss": 2.2932, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.43023255813953487, |
|
"grad_norm": 1.1941175241736834, |
|
"learning_rate": 6.425187150254097e-06, |
|
"loss": 2.3102, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.4309593023255814, |
|
"grad_norm": 1.0851777276937011, |
|
"learning_rate": 6.413828454900351e-06, |
|
"loss": 2.2651, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.4316860465116279, |
|
"grad_norm": 1.111314328918144, |
|
"learning_rate": 6.402461823408584e-06, |
|
"loss": 2.3142, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.4324127906976744, |
|
"grad_norm": 1.1107472084593226, |
|
"learning_rate": 6.391087319582264e-06, |
|
"loss": 2.3056, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4331395348837209, |
|
"grad_norm": 1.110937134108463, |
|
"learning_rate": 6.379705007269046e-06, |
|
"loss": 2.3458, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.43386627906976744, |
|
"grad_norm": 1.1301775944171741, |
|
"learning_rate": 6.368314950360416e-06, |
|
"loss": 2.3003, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.43459302325581395, |
|
"grad_norm": 1.0782893388635726, |
|
"learning_rate": 6.356917212791332e-06, |
|
"loss": 2.3394, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.43531976744186046, |
|
"grad_norm": 1.1952070508071906, |
|
"learning_rate": 6.3455118585398676e-06, |
|
"loss": 2.3156, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.436046511627907, |
|
"grad_norm": 1.1855496510040404, |
|
"learning_rate": 6.334098951626847e-06, |
|
"loss": 2.3322, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4367732558139535, |
|
"grad_norm": 1.1611150891834845, |
|
"learning_rate": 6.3226785561154914e-06, |
|
"loss": 2.3106, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 1.129860856742074, |
|
"learning_rate": 6.311250736111058e-06, |
|
"loss": 2.2959, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4382267441860465, |
|
"grad_norm": 1.1621204046893316, |
|
"learning_rate": 6.299815555760478e-06, |
|
"loss": 2.2952, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.438953488372093, |
|
"grad_norm": 1.0973622926564048, |
|
"learning_rate": 6.288373079251996e-06, |
|
"loss": 2.3415, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.43968023255813954, |
|
"grad_norm": 1.0808189653539233, |
|
"learning_rate": 6.276923370814815e-06, |
|
"loss": 2.2928, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.44040697674418605, |
|
"grad_norm": 1.1346845786046464, |
|
"learning_rate": 6.265466494718731e-06, |
|
"loss": 2.2576, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.44113372093023256, |
|
"grad_norm": 1.0957369183695937, |
|
"learning_rate": 6.254002515273775e-06, |
|
"loss": 2.2973, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4418604651162791, |
|
"grad_norm": 1.1034874026636983, |
|
"learning_rate": 6.242531496829848e-06, |
|
"loss": 2.2827, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.4425872093023256, |
|
"grad_norm": 1.1697709003724293, |
|
"learning_rate": 6.231053503776363e-06, |
|
"loss": 2.3238, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.4433139534883721, |
|
"grad_norm": 1.040137222032865, |
|
"learning_rate": 6.219568600541886e-06, |
|
"loss": 2.2895, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4440406976744186, |
|
"grad_norm": 1.3641221427787078, |
|
"learning_rate": 6.208076851593768e-06, |
|
"loss": 2.3187, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.44476744186046513, |
|
"grad_norm": 1.1013994361963013, |
|
"learning_rate": 6.1965783214377895e-06, |
|
"loss": 2.2899, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.44549418604651164, |
|
"grad_norm": 1.3087669498751793, |
|
"learning_rate": 6.185073074617793e-06, |
|
"loss": 2.2882, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.44622093023255816, |
|
"grad_norm": 1.1024012017074452, |
|
"learning_rate": 6.173561175715323e-06, |
|
"loss": 2.2886, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.44694767441860467, |
|
"grad_norm": 1.198664497940916, |
|
"learning_rate": 6.1620426893492645e-06, |
|
"loss": 2.3531, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4476744186046512, |
|
"grad_norm": 1.1648828913400078, |
|
"learning_rate": 6.150517680175482e-06, |
|
"loss": 2.3145, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.4484011627906977, |
|
"grad_norm": 1.1859586096771217, |
|
"learning_rate": 6.13898621288645e-06, |
|
"loss": 2.2928, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4491279069767442, |
|
"grad_norm": 1.074740378189526, |
|
"learning_rate": 6.127448352210894e-06, |
|
"loss": 2.3453, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.4498546511627907, |
|
"grad_norm": 1.1456985539298716, |
|
"learning_rate": 6.115904162913431e-06, |
|
"loss": 2.2879, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.45058139534883723, |
|
"grad_norm": 1.2351709659565, |
|
"learning_rate": 6.1043537097941985e-06, |
|
"loss": 2.2922, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.45130813953488375, |
|
"grad_norm": 1.1500337120485173, |
|
"learning_rate": 6.092797057688496e-06, |
|
"loss": 2.2747, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.45203488372093026, |
|
"grad_norm": 1.289050278474326, |
|
"learning_rate": 6.081234271466416e-06, |
|
"loss": 2.345, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.45276162790697677, |
|
"grad_norm": 1.279731812260382, |
|
"learning_rate": 6.0696654160324875e-06, |
|
"loss": 2.3395, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.45348837209302323, |
|
"grad_norm": 2.416509224462852, |
|
"learning_rate": 6.058090556325305e-06, |
|
"loss": 2.2773, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.45421511627906974, |
|
"grad_norm": 1.3184450912253118, |
|
"learning_rate": 6.046509757317168e-06, |
|
"loss": 2.2991, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.45494186046511625, |
|
"grad_norm": 1.20112258515423, |
|
"learning_rate": 6.034923084013713e-06, |
|
"loss": 2.2907, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.45566860465116277, |
|
"grad_norm": 1.2680453831355194, |
|
"learning_rate": 6.0233306014535505e-06, |
|
"loss": 2.3344, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.4563953488372093, |
|
"grad_norm": 1.077913847116916, |
|
"learning_rate": 6.0117323747079e-06, |
|
"loss": 2.2932, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.4571220930232558, |
|
"grad_norm": 1.2295931224219867, |
|
"learning_rate": 6.000128468880223e-06, |
|
"loss": 2.383, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.4578488372093023, |
|
"grad_norm": 1.1337008770515467, |
|
"learning_rate": 5.988518949105862e-06, |
|
"loss": 2.2843, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4585755813953488, |
|
"grad_norm": 1.1357848209286197, |
|
"learning_rate": 5.976903880551669e-06, |
|
"loss": 2.2913, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.45930232558139533, |
|
"grad_norm": 1.164197176327958, |
|
"learning_rate": 5.965283328415644e-06, |
|
"loss": 2.3219, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.46002906976744184, |
|
"grad_norm": 1.261751325053521, |
|
"learning_rate": 5.953657357926569e-06, |
|
"loss": 2.2947, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.46075581395348836, |
|
"grad_norm": 1.4417326298084916, |
|
"learning_rate": 5.942026034343636e-06, |
|
"loss": 2.3586, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.46148255813953487, |
|
"grad_norm": 1.0410453025295299, |
|
"learning_rate": 5.930389422956088e-06, |
|
"loss": 2.2798, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.4622093023255814, |
|
"grad_norm": 3.232269670369028, |
|
"learning_rate": 5.918747589082853e-06, |
|
"loss": 2.3494, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.4629360465116279, |
|
"grad_norm": 1.3842857905764918, |
|
"learning_rate": 5.907100598072166e-06, |
|
"loss": 2.3548, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.4636627906976744, |
|
"grad_norm": 1.1648587635712846, |
|
"learning_rate": 5.895448515301218e-06, |
|
"loss": 2.3716, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.4643895348837209, |
|
"grad_norm": 1.284402759492195, |
|
"learning_rate": 5.883791406175775e-06, |
|
"loss": 2.2782, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 1.4101311890951338, |
|
"learning_rate": 5.872129336129821e-06, |
|
"loss": 2.2666, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46584302325581395, |
|
"grad_norm": 1.3443735682842992, |
|
"learning_rate": 5.860462370625189e-06, |
|
"loss": 2.2984, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.46656976744186046, |
|
"grad_norm": 1.3212822331398921, |
|
"learning_rate": 5.848790575151181e-06, |
|
"loss": 2.2868, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.467296511627907, |
|
"grad_norm": 1.0815673694776267, |
|
"learning_rate": 5.837114015224223e-06, |
|
"loss": 2.2562, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.4680232558139535, |
|
"grad_norm": 1.2786312590580617, |
|
"learning_rate": 5.8254327563874794e-06, |
|
"loss": 2.288, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 1.055564931514476, |
|
"learning_rate": 5.813746864210489e-06, |
|
"loss": 2.2965, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.4694767441860465, |
|
"grad_norm": 1.2680090066437342, |
|
"learning_rate": 5.8020564042888015e-06, |
|
"loss": 2.2938, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.470203488372093, |
|
"grad_norm": 1.5648914252969863, |
|
"learning_rate": 5.790361442243605e-06, |
|
"loss": 2.307, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.47093023255813954, |
|
"grad_norm": 1.0883293797548825, |
|
"learning_rate": 5.778662043721359e-06, |
|
"loss": 2.3145, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.47165697674418605, |
|
"grad_norm": 1.1178636928212937, |
|
"learning_rate": 5.766958274393428e-06, |
|
"loss": 2.3307, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.47238372093023256, |
|
"grad_norm": 1.2368388872888687, |
|
"learning_rate": 5.7552501999557065e-06, |
|
"loss": 2.2741, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4731104651162791, |
|
"grad_norm": 1.1358159404465558, |
|
"learning_rate": 5.743537886128258e-06, |
|
"loss": 2.2993, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.4738372093023256, |
|
"grad_norm": 1.1416099375776572, |
|
"learning_rate": 5.731821398654944e-06, |
|
"loss": 2.2632, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.4745639534883721, |
|
"grad_norm": 1.7153304480233373, |
|
"learning_rate": 5.72010080330305e-06, |
|
"loss": 2.2899, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.4752906976744186, |
|
"grad_norm": 1.1389469518056305, |
|
"learning_rate": 5.708376165862921e-06, |
|
"loss": 2.3123, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.47601744186046513, |
|
"grad_norm": 1.178285535354385, |
|
"learning_rate": 5.696647552147589e-06, |
|
"loss": 2.3086, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.47674418604651164, |
|
"grad_norm": 1.49285425667658, |
|
"learning_rate": 5.684915027992415e-06, |
|
"loss": 2.2843, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.47747093023255816, |
|
"grad_norm": 1.2217725848812606, |
|
"learning_rate": 5.673178659254698e-06, |
|
"loss": 2.3288, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.47819767441860467, |
|
"grad_norm": 1.1102124449809345, |
|
"learning_rate": 5.661438511813324e-06, |
|
"loss": 2.2965, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.4789244186046512, |
|
"grad_norm": 1.0889978851875848, |
|
"learning_rate": 5.64969465156839e-06, |
|
"loss": 2.2919, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.4796511627906977, |
|
"grad_norm": 1.137859070158329, |
|
"learning_rate": 5.637947144440832e-06, |
|
"loss": 2.307, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4803779069767442, |
|
"grad_norm": 1.0732988436678494, |
|
"learning_rate": 5.626196056372056e-06, |
|
"loss": 2.322, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.4811046511627907, |
|
"grad_norm": 1.0814609091912941, |
|
"learning_rate": 5.614441453323571e-06, |
|
"loss": 2.3412, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.48183139534883723, |
|
"grad_norm": 1.1099866069205775, |
|
"learning_rate": 5.6026834012766155e-06, |
|
"loss": 2.296, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.48255813953488375, |
|
"grad_norm": 1.3837080687086778, |
|
"learning_rate": 5.590921966231788e-06, |
|
"loss": 2.3248, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.48328488372093026, |
|
"grad_norm": 1.0810546987007168, |
|
"learning_rate": 5.579157214208675e-06, |
|
"loss": 2.2786, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.48401162790697677, |
|
"grad_norm": 1.1320880266123603, |
|
"learning_rate": 5.567389211245486e-06, |
|
"loss": 2.2809, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.48473837209302323, |
|
"grad_norm": 1.0770925548918713, |
|
"learning_rate": 5.555618023398671e-06, |
|
"loss": 2.267, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.48546511627906974, |
|
"grad_norm": 1.1382037438982808, |
|
"learning_rate": 5.5438437167425675e-06, |
|
"loss": 2.3134, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.48619186046511625, |
|
"grad_norm": 1.0790789851177505, |
|
"learning_rate": 5.532066357369012e-06, |
|
"loss": 2.297, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.48691860465116277, |
|
"grad_norm": 1.1191002552884677, |
|
"learning_rate": 5.52028601138698e-06, |
|
"loss": 2.2478, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4876453488372093, |
|
"grad_norm": 1.2027681208143934, |
|
"learning_rate": 5.508502744922212e-06, |
|
"loss": 2.2869, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.4883720930232558, |
|
"grad_norm": 1.0310047577721608, |
|
"learning_rate": 5.496716624116836e-06, |
|
"loss": 2.3195, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.4890988372093023, |
|
"grad_norm": 1.335901274130401, |
|
"learning_rate": 5.484927715129011e-06, |
|
"loss": 2.328, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.4898255813953488, |
|
"grad_norm": 1.1541404345602513, |
|
"learning_rate": 5.4731360841325405e-06, |
|
"loss": 2.2637, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.49055232558139533, |
|
"grad_norm": 1.3074779848860199, |
|
"learning_rate": 5.46134179731651e-06, |
|
"loss": 2.313, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.49127906976744184, |
|
"grad_norm": 1.1990590115578161, |
|
"learning_rate": 5.449544920884912e-06, |
|
"loss": 2.2838, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.49200581395348836, |
|
"grad_norm": 1.1453336133464966, |
|
"learning_rate": 5.437745521056272e-06, |
|
"loss": 2.309, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.49273255813953487, |
|
"grad_norm": 1.2139105440204063, |
|
"learning_rate": 5.425943664063284e-06, |
|
"loss": 2.3336, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.4934593023255814, |
|
"grad_norm": 1.2636132251369618, |
|
"learning_rate": 5.414139416152435e-06, |
|
"loss": 2.328, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.4941860465116279, |
|
"grad_norm": 1.208004638217351, |
|
"learning_rate": 5.402332843583631e-06, |
|
"loss": 2.2732, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4949127906976744, |
|
"grad_norm": 1.18632348684088, |
|
"learning_rate": 5.390524012629824e-06, |
|
"loss": 2.3023, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.4956395348837209, |
|
"grad_norm": 1.73169268404447, |
|
"learning_rate": 5.3787129895766484e-06, |
|
"loss": 2.3595, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.49636627906976744, |
|
"grad_norm": 1.22781474125336, |
|
"learning_rate": 5.3668998407220385e-06, |
|
"loss": 2.3425, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.49709302325581395, |
|
"grad_norm": 1.4004816370059154, |
|
"learning_rate": 5.3550846323758666e-06, |
|
"loss": 2.2719, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.49781976744186046, |
|
"grad_norm": 1.175122817240152, |
|
"learning_rate": 5.343267430859559e-06, |
|
"loss": 2.2725, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.498546511627907, |
|
"grad_norm": 1.2765509033619677, |
|
"learning_rate": 5.331448302505736e-06, |
|
"loss": 2.2324, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.4992732558139535, |
|
"grad_norm": 1.134665763858464, |
|
"learning_rate": 5.319627313657829e-06, |
|
"loss": 2.295, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.2622065199475687, |
|
"learning_rate": 5.3078045306697154e-06, |
|
"loss": 2.3023, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5007267441860465, |
|
"grad_norm": 1.103313759369295, |
|
"learning_rate": 5.295980019905342e-06, |
|
"loss": 2.2914, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.501453488372093, |
|
"grad_norm": 1.0658556313608603, |
|
"learning_rate": 5.284153847738356e-06, |
|
"loss": 2.2972, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5021802325581395, |
|
"grad_norm": 1.1540767104639644, |
|
"learning_rate": 5.272326080551729e-06, |
|
"loss": 2.3199, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.502906976744186, |
|
"grad_norm": 1.0685698322062964, |
|
"learning_rate": 5.260496784737386e-06, |
|
"loss": 2.3586, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5036337209302325, |
|
"grad_norm": 1.075137770401511, |
|
"learning_rate": 5.248666026695835e-06, |
|
"loss": 2.3327, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5043604651162791, |
|
"grad_norm": 1.2485248045497954, |
|
"learning_rate": 5.236833872835785e-06, |
|
"loss": 2.2846, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.5050872093023255, |
|
"grad_norm": 1.1866909415018816, |
|
"learning_rate": 5.2250003895737865e-06, |
|
"loss": 2.347, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5058139534883721, |
|
"grad_norm": 1.1733792890442774, |
|
"learning_rate": 5.213165643333851e-06, |
|
"loss": 2.2621, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5065406976744186, |
|
"grad_norm": 1.0957320197546991, |
|
"learning_rate": 5.201329700547077e-06, |
|
"loss": 2.319, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5072674418604651, |
|
"grad_norm": 1.783025447548493, |
|
"learning_rate": 5.1894926276512824e-06, |
|
"loss": 2.331, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5079941860465116, |
|
"grad_norm": 1.1016261702586139, |
|
"learning_rate": 5.177654491090627e-06, |
|
"loss": 2.2892, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5087209302325582, |
|
"grad_norm": 1.3255266643870636, |
|
"learning_rate": 5.1658153573152405e-06, |
|
"loss": 2.3513, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5094476744186046, |
|
"grad_norm": 1.130548269660744, |
|
"learning_rate": 5.153975292780852e-06, |
|
"loss": 2.3004, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5101744186046512, |
|
"grad_norm": 1.2122532789242535, |
|
"learning_rate": 5.1421343639484165e-06, |
|
"loss": 2.2811, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5109011627906976, |
|
"grad_norm": 1.1880143344840974, |
|
"learning_rate": 5.130292637283735e-06, |
|
"loss": 2.3121, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5116279069767442, |
|
"grad_norm": 1.4083680998178665, |
|
"learning_rate": 5.118450179257091e-06, |
|
"loss": 2.2639, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5123546511627907, |
|
"grad_norm": 1.2695136745113604, |
|
"learning_rate": 5.1066070563428736e-06, |
|
"loss": 2.2975, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5130813953488372, |
|
"grad_norm": 1.1487564964506674, |
|
"learning_rate": 5.0947633350192035e-06, |
|
"loss": 2.313, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5138081395348837, |
|
"grad_norm": 1.2133446995418098, |
|
"learning_rate": 5.082919081767558e-06, |
|
"loss": 2.2599, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5145348837209303, |
|
"grad_norm": 1.060658390712619, |
|
"learning_rate": 5.071074363072403e-06, |
|
"loss": 2.3123, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5152616279069767, |
|
"grad_norm": 1.2238495475049125, |
|
"learning_rate": 5.059229245420819e-06, |
|
"loss": 2.307, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5159883720930233, |
|
"grad_norm": 1.2300813441464897, |
|
"learning_rate": 5.047383795302119e-06, |
|
"loss": 2.3029, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5167151162790697, |
|
"grad_norm": 1.0721690467315328, |
|
"learning_rate": 5.035538079207488e-06, |
|
"loss": 2.2998, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5174418604651163, |
|
"grad_norm": 1.0965555474718804, |
|
"learning_rate": 5.023692163629603e-06, |
|
"loss": 2.3067, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5181686046511628, |
|
"grad_norm": 1.0914497068226068, |
|
"learning_rate": 5.01184611506226e-06, |
|
"loss": 2.3203, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5188953488372093, |
|
"grad_norm": 1.0861900942041762, |
|
"learning_rate": 5e-06, |
|
"loss": 2.3539, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5196220930232558, |
|
"grad_norm": 1.1817757691113775, |
|
"learning_rate": 4.988153884937742e-06, |
|
"loss": 2.323, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5203488372093024, |
|
"grad_norm": 1.285821525788281, |
|
"learning_rate": 4.9763078363703975e-06, |
|
"loss": 2.2366, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5210755813953488, |
|
"grad_norm": 1.2634506605635316, |
|
"learning_rate": 4.964461920792512e-06, |
|
"loss": 2.3212, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5218023255813954, |
|
"grad_norm": 1.0448269993418493, |
|
"learning_rate": 4.952616204697882e-06, |
|
"loss": 2.3133, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5225290697674418, |
|
"grad_norm": 1.0529467546593827, |
|
"learning_rate": 4.940770754579183e-06, |
|
"loss": 2.2669, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5232558139534884, |
|
"grad_norm": 1.0748992392149725, |
|
"learning_rate": 4.928925636927597e-06, |
|
"loss": 2.2731, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5239825581395349, |
|
"grad_norm": 1.2467135751557892, |
|
"learning_rate": 4.917080918232444e-06, |
|
"loss": 2.3027, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5247093023255814, |
|
"grad_norm": 1.3441280421823978, |
|
"learning_rate": 4.905236664980797e-06, |
|
"loss": 2.2939, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5254360465116279, |
|
"grad_norm": 1.148440473548479, |
|
"learning_rate": 4.893392943657127e-06, |
|
"loss": 2.2725, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5261627906976745, |
|
"grad_norm": 1.0956915790093777, |
|
"learning_rate": 4.88154982074291e-06, |
|
"loss": 2.278, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5268895348837209, |
|
"grad_norm": 1.0794100545360528, |
|
"learning_rate": 4.8697073627162675e-06, |
|
"loss": 2.344, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5276162790697675, |
|
"grad_norm": 1.0353531761976227, |
|
"learning_rate": 4.857865636051586e-06, |
|
"loss": 2.3008, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.528343023255814, |
|
"grad_norm": 1.2156865801913177, |
|
"learning_rate": 4.846024707219149e-06, |
|
"loss": 2.3188, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5290697674418605, |
|
"grad_norm": 1.112920842833822, |
|
"learning_rate": 4.834184642684762e-06, |
|
"loss": 2.3114, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.529796511627907, |
|
"grad_norm": 1.0831654219932778, |
|
"learning_rate": 4.822345508909376e-06, |
|
"loss": 2.2565, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5305232558139535, |
|
"grad_norm": 1.0230080869234226, |
|
"learning_rate": 4.810507372348721e-06, |
|
"loss": 2.2941, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 1.1561917177674794, |
|
"learning_rate": 4.798670299452926e-06, |
|
"loss": 2.3085, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5319767441860465, |
|
"grad_norm": 1.0618213129594278, |
|
"learning_rate": 4.786834356666153e-06, |
|
"loss": 2.3032, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.532703488372093, |
|
"grad_norm": 1.1594693610124356, |
|
"learning_rate": 4.774999610426216e-06, |
|
"loss": 2.341, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5334302325581395, |
|
"grad_norm": 1.169649137664396, |
|
"learning_rate": 4.7631661271642185e-06, |
|
"loss": 2.2809, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.534156976744186, |
|
"grad_norm": 1.0549049328053892, |
|
"learning_rate": 4.751333973304166e-06, |
|
"loss": 2.333, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5348837209302325, |
|
"grad_norm": 1.0038525318185807, |
|
"learning_rate": 4.739503215262614e-06, |
|
"loss": 2.3116, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5356104651162791, |
|
"grad_norm": 1.31997708555104, |
|
"learning_rate": 4.727673919448271e-06, |
|
"loss": 2.2741, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5363372093023255, |
|
"grad_norm": 2.627288102246105, |
|
"learning_rate": 4.715846152261645e-06, |
|
"loss": 2.3146, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5370639534883721, |
|
"grad_norm": 1.2734425519555057, |
|
"learning_rate": 4.704019980094659e-06, |
|
"loss": 2.2962, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5377906976744186, |
|
"grad_norm": 1.0854352730012788, |
|
"learning_rate": 4.692195469330286e-06, |
|
"loss": 2.3271, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5385174418604651, |
|
"grad_norm": 1.2912493479896627, |
|
"learning_rate": 4.680372686342173e-06, |
|
"loss": 2.3492, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5392441860465116, |
|
"grad_norm": 1.1007088516087424, |
|
"learning_rate": 4.668551697494265e-06, |
|
"loss": 2.2956, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5399709302325582, |
|
"grad_norm": 1.2218535001454547, |
|
"learning_rate": 4.656732569140441e-06, |
|
"loss": 2.3164, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5406976744186046, |
|
"grad_norm": 1.1620697376080404, |
|
"learning_rate": 4.644915367624134e-06, |
|
"loss": 2.3121, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.5414244186046512, |
|
"grad_norm": 1.0853929215895797, |
|
"learning_rate": 4.6331001592779615e-06, |
|
"loss": 2.2714, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5421511627906976, |
|
"grad_norm": 1.2845929065378492, |
|
"learning_rate": 4.621287010423353e-06, |
|
"loss": 2.2752, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5428779069767442, |
|
"grad_norm": 1.7105298123808783, |
|
"learning_rate": 4.609475987370177e-06, |
|
"loss": 2.226, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5436046511627907, |
|
"grad_norm": 1.089059131220579, |
|
"learning_rate": 4.597667156416371e-06, |
|
"loss": 2.2778, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5443313953488372, |
|
"grad_norm": 1.16904595629462, |
|
"learning_rate": 4.585860583847566e-06, |
|
"loss": 2.2746, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5450581395348837, |
|
"grad_norm": 1.0790766039012891, |
|
"learning_rate": 4.5740563359367164e-06, |
|
"loss": 2.3374, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5457848837209303, |
|
"grad_norm": 1.1928040971714162, |
|
"learning_rate": 4.562254478943729e-06, |
|
"loss": 2.295, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.5465116279069767, |
|
"grad_norm": 1.0780584166585039, |
|
"learning_rate": 4.550455079115091e-06, |
|
"loss": 2.3019, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.5472383720930233, |
|
"grad_norm": 1.2031883158838685, |
|
"learning_rate": 4.53865820268349e-06, |
|
"loss": 2.3214, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.5479651162790697, |
|
"grad_norm": 1.165919792008399, |
|
"learning_rate": 4.52686391586746e-06, |
|
"loss": 2.3162, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.5486918604651163, |
|
"grad_norm": 1.0614325726147842, |
|
"learning_rate": 4.51507228487099e-06, |
|
"loss": 2.3383, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.5494186046511628, |
|
"grad_norm": 1.6562096024577802, |
|
"learning_rate": 4.503283375883165e-06, |
|
"loss": 2.2749, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5501453488372093, |
|
"grad_norm": 1.032325987733994, |
|
"learning_rate": 4.49149725507779e-06, |
|
"loss": 2.3248, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.5508720930232558, |
|
"grad_norm": 1.1761618418955344, |
|
"learning_rate": 4.479713988613021e-06, |
|
"loss": 2.3352, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.5515988372093024, |
|
"grad_norm": 1.4407183902299339, |
|
"learning_rate": 4.467933642630989e-06, |
|
"loss": 2.3129, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.5523255813953488, |
|
"grad_norm": 1.7485899959244522, |
|
"learning_rate": 4.456156283257433e-06, |
|
"loss": 2.2731, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5530523255813954, |
|
"grad_norm": 1.067156350753222, |
|
"learning_rate": 4.44438197660133e-06, |
|
"loss": 2.2612, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.5537790697674418, |
|
"grad_norm": 1.1944089605557895, |
|
"learning_rate": 4.432610788754517e-06, |
|
"loss": 2.3038, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.5545058139534884, |
|
"grad_norm": 1.0693481833494738, |
|
"learning_rate": 4.420842785791326e-06, |
|
"loss": 2.3026, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.5552325581395349, |
|
"grad_norm": 1.038170071261918, |
|
"learning_rate": 4.409078033768214e-06, |
|
"loss": 2.3048, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.5559593023255814, |
|
"grad_norm": 1.0195295401284605, |
|
"learning_rate": 4.397316598723385e-06, |
|
"loss": 2.3144, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.5566860465116279, |
|
"grad_norm": 1.0481555681045303, |
|
"learning_rate": 4.3855585466764305e-06, |
|
"loss": 2.3215, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.5574127906976745, |
|
"grad_norm": 1.0610663400067764, |
|
"learning_rate": 4.373803943627946e-06, |
|
"loss": 2.2499, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.5581395348837209, |
|
"grad_norm": 1.3765746707741413, |
|
"learning_rate": 4.362052855559171e-06, |
|
"loss": 2.3205, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.5588662790697675, |
|
"grad_norm": 1.3701617218217217, |
|
"learning_rate": 4.350305348431612e-06, |
|
"loss": 2.3042, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.559593023255814, |
|
"grad_norm": 1.4655827010991576, |
|
"learning_rate": 4.338561488186678e-06, |
|
"loss": 2.2732, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5603197674418605, |
|
"grad_norm": 1.269480197875066, |
|
"learning_rate": 4.326821340745304e-06, |
|
"loss": 2.3294, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.561046511627907, |
|
"grad_norm": 1.2015230300920634, |
|
"learning_rate": 4.315084972007587e-06, |
|
"loss": 2.3379, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.5617732558139535, |
|
"grad_norm": 1.6683359319886466, |
|
"learning_rate": 4.303352447852412e-06, |
|
"loss": 2.2795, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 1.145915220611224, |
|
"learning_rate": 4.291623834137082e-06, |
|
"loss": 2.3492, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.5632267441860465, |
|
"grad_norm": 1.0525245783287247, |
|
"learning_rate": 4.279899196696953e-06, |
|
"loss": 2.2973, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.563953488372093, |
|
"grad_norm": 1.0792534481764795, |
|
"learning_rate": 4.268178601345057e-06, |
|
"loss": 2.3456, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.5646802325581395, |
|
"grad_norm": 1.0722001996118922, |
|
"learning_rate": 4.256462113871741e-06, |
|
"loss": 2.3002, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.565406976744186, |
|
"grad_norm": 1.1005919176139587, |
|
"learning_rate": 4.2447498000442935e-06, |
|
"loss": 2.2957, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.5661337209302325, |
|
"grad_norm": 1.0155813164835708, |
|
"learning_rate": 4.233041725606573e-06, |
|
"loss": 2.3771, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.5668604651162791, |
|
"grad_norm": 1.1740941879042222, |
|
"learning_rate": 4.2213379562786406e-06, |
|
"loss": 2.3246, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5675872093023255, |
|
"grad_norm": 1.0820477727645355, |
|
"learning_rate": 4.209638557756396e-06, |
|
"loss": 2.306, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.5683139534883721, |
|
"grad_norm": 1.184123545414512, |
|
"learning_rate": 4.1979435957111984e-06, |
|
"loss": 2.3486, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.5690406976744186, |
|
"grad_norm": 1.2273527031302494, |
|
"learning_rate": 4.186253135789511e-06, |
|
"loss": 2.3706, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.5697674418604651, |
|
"grad_norm": 2.5274481337597683, |
|
"learning_rate": 4.1745672436125205e-06, |
|
"loss": 2.2692, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.5704941860465116, |
|
"grad_norm": 1.1293062589962266, |
|
"learning_rate": 4.162885984775777e-06, |
|
"loss": 2.3382, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.5712209302325582, |
|
"grad_norm": 1.1904762121908412, |
|
"learning_rate": 4.15120942484882e-06, |
|
"loss": 2.2992, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.5719476744186046, |
|
"grad_norm": 1.0604891438581503, |
|
"learning_rate": 4.139537629374814e-06, |
|
"loss": 2.2331, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.5726744186046512, |
|
"grad_norm": 1.9696331643707805, |
|
"learning_rate": 4.12787066387018e-06, |
|
"loss": 2.2814, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.5734011627906976, |
|
"grad_norm": 1.101600496243351, |
|
"learning_rate": 4.116208593824227e-06, |
|
"loss": 2.2422, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.5741279069767442, |
|
"grad_norm": 1.116699022328913, |
|
"learning_rate": 4.104551484698785e-06, |
|
"loss": 2.2757, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5748546511627907, |
|
"grad_norm": 1.1199526384176086, |
|
"learning_rate": 4.092899401927836e-06, |
|
"loss": 2.3383, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.5755813953488372, |
|
"grad_norm": 1.093552386009082, |
|
"learning_rate": 4.081252410917148e-06, |
|
"loss": 2.2934, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.5763081395348837, |
|
"grad_norm": 1.0487756756149464, |
|
"learning_rate": 4.069610577043912e-06, |
|
"loss": 2.2438, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.5770348837209303, |
|
"grad_norm": 1.1051452936093074, |
|
"learning_rate": 4.057973965656365e-06, |
|
"loss": 2.2804, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.5777616279069767, |
|
"grad_norm": 1.1164624106499657, |
|
"learning_rate": 4.046342642073433e-06, |
|
"loss": 2.282, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.5784883720930233, |
|
"grad_norm": 0.9908772046288108, |
|
"learning_rate": 4.034716671584357e-06, |
|
"loss": 2.2756, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.5792151162790697, |
|
"grad_norm": 1.0456373042936773, |
|
"learning_rate": 4.0230961194483325e-06, |
|
"loss": 2.3089, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.5799418604651163, |
|
"grad_norm": 1.0795700462534907, |
|
"learning_rate": 4.01148105089414e-06, |
|
"loss": 2.3303, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.5806686046511628, |
|
"grad_norm": 1.1053174271707678, |
|
"learning_rate": 3.999871531119779e-06, |
|
"loss": 2.3436, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 1.1301776518164668, |
|
"learning_rate": 3.988267625292102e-06, |
|
"loss": 2.2661, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5821220930232558, |
|
"grad_norm": 1.172930982684841, |
|
"learning_rate": 3.976669398546451e-06, |
|
"loss": 2.345, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.5828488372093024, |
|
"grad_norm": 1.0890559009206928, |
|
"learning_rate": 3.9650769159862875e-06, |
|
"loss": 2.2633, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.5835755813953488, |
|
"grad_norm": 1.0775106721417829, |
|
"learning_rate": 3.9534902426828325e-06, |
|
"loss": 2.2898, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.5843023255813954, |
|
"grad_norm": 1.1586454869442395, |
|
"learning_rate": 3.941909443674696e-06, |
|
"loss": 2.2771, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.5850290697674418, |
|
"grad_norm": 1.176709888292826, |
|
"learning_rate": 3.930334583967514e-06, |
|
"loss": 2.2887, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.5857558139534884, |
|
"grad_norm": 1.0930311463479425, |
|
"learning_rate": 3.918765728533586e-06, |
|
"loss": 2.2662, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.5864825581395349, |
|
"grad_norm": 0.9869447591684988, |
|
"learning_rate": 3.907202942311506e-06, |
|
"loss": 2.2685, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.5872093023255814, |
|
"grad_norm": 1.1593303017123573, |
|
"learning_rate": 3.895646290205803e-06, |
|
"loss": 2.3404, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.5879360465116279, |
|
"grad_norm": 1.3958293581245043, |
|
"learning_rate": 3.884095837086571e-06, |
|
"loss": 2.3046, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.5886627906976745, |
|
"grad_norm": 1.0971451664444143, |
|
"learning_rate": 3.872551647789108e-06, |
|
"loss": 2.2717, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.5893895348837209, |
|
"grad_norm": 3.09870534328071, |
|
"learning_rate": 3.861013787113553e-06, |
|
"loss": 2.3797, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.5901162790697675, |
|
"grad_norm": 1.141704824594259, |
|
"learning_rate": 3.849482319824521e-06, |
|
"loss": 2.296, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.590843023255814, |
|
"grad_norm": 0.9859445781082979, |
|
"learning_rate": 3.837957310650738e-06, |
|
"loss": 2.3019, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.5915697674418605, |
|
"grad_norm": 1.0950233921797288, |
|
"learning_rate": 3.82643882428468e-06, |
|
"loss": 2.3029, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.592296511627907, |
|
"grad_norm": 1.0700692250622799, |
|
"learning_rate": 3.81492692538221e-06, |
|
"loss": 2.3141, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.5930232558139535, |
|
"grad_norm": 1.091662316980483, |
|
"learning_rate": 3.803421678562213e-06, |
|
"loss": 2.2995, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 1.204369219463071, |
|
"learning_rate": 3.7919231484062334e-06, |
|
"loss": 2.2585, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.5944767441860465, |
|
"grad_norm": 1.0764274721400988, |
|
"learning_rate": 3.7804313994581143e-06, |
|
"loss": 2.2878, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.595203488372093, |
|
"grad_norm": 1.1663852917982485, |
|
"learning_rate": 3.7689464962236367e-06, |
|
"loss": 2.2805, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.5959302325581395, |
|
"grad_norm": 1.0119449535199592, |
|
"learning_rate": 3.757468503170153e-06, |
|
"loss": 2.3124, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.596656976744186, |
|
"grad_norm": 0.9879631073022755, |
|
"learning_rate": 3.7459974847262253e-06, |
|
"loss": 2.2971, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.5973837209302325, |
|
"grad_norm": 1.0980204844106818, |
|
"learning_rate": 3.734533505281269e-06, |
|
"loss": 2.3215, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.5981104651162791, |
|
"grad_norm": 1.1727982952700535, |
|
"learning_rate": 3.723076629185186e-06, |
|
"loss": 2.3073, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.5988372093023255, |
|
"grad_norm": 1.095773058831185, |
|
"learning_rate": 3.7116269207480055e-06, |
|
"loss": 2.3034, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.5995639534883721, |
|
"grad_norm": 1.1102423289772532, |
|
"learning_rate": 3.700184444239524e-06, |
|
"loss": 2.3286, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6002906976744186, |
|
"grad_norm": 1.2168841679733824, |
|
"learning_rate": 3.6887492638889433e-06, |
|
"loss": 2.3142, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6010174418604651, |
|
"grad_norm": 1.1239188965273867, |
|
"learning_rate": 3.677321443884509e-06, |
|
"loss": 2.3021, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6017441860465116, |
|
"grad_norm": 1.0061388225584935, |
|
"learning_rate": 3.6659010483731543e-06, |
|
"loss": 2.2644, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6024709302325582, |
|
"grad_norm": 1.0745214690142126, |
|
"learning_rate": 3.654488141460134e-06, |
|
"loss": 2.3161, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.6031976744186046, |
|
"grad_norm": 1.066456873356294, |
|
"learning_rate": 3.6430827872086694e-06, |
|
"loss": 2.3044, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6039244186046512, |
|
"grad_norm": 1.0201857786513382, |
|
"learning_rate": 3.6316850496395863e-06, |
|
"loss": 2.344, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6046511627906976, |
|
"grad_norm": 1.3732462363703541, |
|
"learning_rate": 3.6202949927309555e-06, |
|
"loss": 2.3352, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6053779069767442, |
|
"grad_norm": 1.0703274758634678, |
|
"learning_rate": 3.6089126804177373e-06, |
|
"loss": 2.283, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6061046511627907, |
|
"grad_norm": 1.183872355394252, |
|
"learning_rate": 3.597538176591417e-06, |
|
"loss": 2.2443, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.6068313953488372, |
|
"grad_norm": 2.828409917772514, |
|
"learning_rate": 3.5861715450996505e-06, |
|
"loss": 2.3027, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6075581395348837, |
|
"grad_norm": 1.0453232535385137, |
|
"learning_rate": 3.5748128497459044e-06, |
|
"loss": 2.2397, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6082848837209303, |
|
"grad_norm": 1.0869450979441266, |
|
"learning_rate": 3.563462154289098e-06, |
|
"loss": 2.2687, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6090116279069767, |
|
"grad_norm": 1.0621257836663385, |
|
"learning_rate": 3.5521195224432436e-06, |
|
"loss": 2.2794, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6097383720930233, |
|
"grad_norm": 1.0092312203378675, |
|
"learning_rate": 3.5407850178770944e-06, |
|
"loss": 2.334, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6104651162790697, |
|
"grad_norm": 1.105493758604535, |
|
"learning_rate": 3.5294587042137796e-06, |
|
"loss": 2.318, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6111918604651163, |
|
"grad_norm": 1.1993001659775373, |
|
"learning_rate": 3.5181406450304536e-06, |
|
"loss": 2.2729, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6119186046511628, |
|
"grad_norm": 1.0688907809651846, |
|
"learning_rate": 3.506830903857933e-06, |
|
"loss": 2.3048, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6126453488372093, |
|
"grad_norm": 1.2797486907093922, |
|
"learning_rate": 3.49552954418035e-06, |
|
"loss": 2.3112, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6133720930232558, |
|
"grad_norm": 1.0551236479337645, |
|
"learning_rate": 3.484236629434783e-06, |
|
"loss": 2.2843, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6140988372093024, |
|
"grad_norm": 1.0368752476104846, |
|
"learning_rate": 3.4729522230109103e-06, |
|
"loss": 2.275, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6148255813953488, |
|
"grad_norm": 1.2212850537214994, |
|
"learning_rate": 3.461676388250651e-06, |
|
"loss": 2.3211, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6155523255813954, |
|
"grad_norm": 1.1497485865405142, |
|
"learning_rate": 3.4504091884478076e-06, |
|
"loss": 2.2402, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6162790697674418, |
|
"grad_norm": 1.0906549367951974, |
|
"learning_rate": 3.4391506868477153e-06, |
|
"loss": 2.2838, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6170058139534884, |
|
"grad_norm": 1.1230941195693127, |
|
"learning_rate": 3.4279009466468825e-06, |
|
"loss": 2.3212, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6177325581395349, |
|
"grad_norm": 1.4108103209541318, |
|
"learning_rate": 3.416660030992639e-06, |
|
"loss": 2.2671, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6184593023255814, |
|
"grad_norm": 3.170948816296744, |
|
"learning_rate": 3.405428002982779e-06, |
|
"loss": 2.3123, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6191860465116279, |
|
"grad_norm": 1.1804134414107927, |
|
"learning_rate": 3.3942049256652093e-06, |
|
"loss": 2.3191, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6199127906976745, |
|
"grad_norm": 1.0937922010266585, |
|
"learning_rate": 3.3829908620375953e-06, |
|
"loss": 2.2751, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6206395348837209, |
|
"grad_norm": 1.1172481321734316, |
|
"learning_rate": 3.3717858750470046e-06, |
|
"loss": 2.2579, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6213662790697675, |
|
"grad_norm": 1.1200023639589576, |
|
"learning_rate": 3.3605900275895565e-06, |
|
"loss": 2.2352, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.622093023255814, |
|
"grad_norm": 1.0708024845639486, |
|
"learning_rate": 3.349403382510068e-06, |
|
"loss": 2.2787, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6228197674418605, |
|
"grad_norm": 1.084888989250318, |
|
"learning_rate": 3.3382260026017027e-06, |
|
"loss": 2.232, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.623546511627907, |
|
"grad_norm": 1.268520820863664, |
|
"learning_rate": 3.3270579506056146e-06, |
|
"loss": 2.2854, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6242732558139535, |
|
"grad_norm": 1.0779562508013507, |
|
"learning_rate": 3.3158992892105975e-06, |
|
"loss": 2.3121, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 1.152571981786872, |
|
"learning_rate": 3.3047500810527343e-06, |
|
"loss": 2.2762, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6257267441860465, |
|
"grad_norm": 1.1127590575536852, |
|
"learning_rate": 3.2936103887150484e-06, |
|
"loss": 2.3208, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.626453488372093, |
|
"grad_norm": 1.1100878449841611, |
|
"learning_rate": 3.2824802747271424e-06, |
|
"loss": 2.3413, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6271802325581395, |
|
"grad_norm": 1.0795636271079854, |
|
"learning_rate": 3.271359801564858e-06, |
|
"loss": 2.2705, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.627906976744186, |
|
"grad_norm": 1.0960995727635903, |
|
"learning_rate": 3.2602490316499197e-06, |
|
"loss": 2.2201, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6286337209302325, |
|
"grad_norm": 2.9347541178607575, |
|
"learning_rate": 3.2491480273495847e-06, |
|
"loss": 2.3146, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6293604651162791, |
|
"grad_norm": 1.0617998945563392, |
|
"learning_rate": 3.2380568509762935e-06, |
|
"loss": 2.3156, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6300872093023255, |
|
"grad_norm": 1.0271455481558849, |
|
"learning_rate": 3.226975564787322e-06, |
|
"loss": 2.277, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6308139534883721, |
|
"grad_norm": 1.3002586280292399, |
|
"learning_rate": 3.215904230984428e-06, |
|
"loss": 2.3272, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6315406976744186, |
|
"grad_norm": 1.5574297719183472, |
|
"learning_rate": 3.204842911713506e-06, |
|
"loss": 2.3306, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6322674418604651, |
|
"grad_norm": 1.1428523129407389, |
|
"learning_rate": 3.1937916690642356e-06, |
|
"loss": 2.3228, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6329941860465116, |
|
"grad_norm": 1.089861769979664, |
|
"learning_rate": 3.182750565069735e-06, |
|
"loss": 2.2114, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6337209302325582, |
|
"grad_norm": 1.1291307856061896, |
|
"learning_rate": 3.171719661706211e-06, |
|
"loss": 2.2489, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6344476744186046, |
|
"grad_norm": 1.101392448358028, |
|
"learning_rate": 3.1606990208926125e-06, |
|
"loss": 2.32, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6351744186046512, |
|
"grad_norm": 5.592420183479199, |
|
"learning_rate": 3.1496887044902815e-06, |
|
"loss": 2.293, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6359011627906976, |
|
"grad_norm": 1.2099630065498752, |
|
"learning_rate": 3.1386887743026083e-06, |
|
"loss": 2.3239, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.6366279069767442, |
|
"grad_norm": 1.165045093955686, |
|
"learning_rate": 3.127699292074683e-06, |
|
"loss": 2.2441, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.6373546511627907, |
|
"grad_norm": 1.088722434436471, |
|
"learning_rate": 3.1167203194929447e-06, |
|
"loss": 2.2861, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.6380813953488372, |
|
"grad_norm": 1.2492892755052187, |
|
"learning_rate": 3.1057519181848474e-06, |
|
"loss": 2.3617, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.6388081395348837, |
|
"grad_norm": 1.1658961078716712, |
|
"learning_rate": 3.0947941497184985e-06, |
|
"loss": 2.2811, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.6395348837209303, |
|
"grad_norm": 1.4870278439353821, |
|
"learning_rate": 3.0838470756023253e-06, |
|
"loss": 2.2888, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6402616279069767, |
|
"grad_norm": 1.103064716569564, |
|
"learning_rate": 3.0729107572847244e-06, |
|
"loss": 2.2916, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.6409883720930233, |
|
"grad_norm": 2.4560218782566583, |
|
"learning_rate": 3.0619852561537165e-06, |
|
"loss": 2.3084, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.6417151162790697, |
|
"grad_norm": 1.1742459127551743, |
|
"learning_rate": 3.0510706335366034e-06, |
|
"loss": 2.297, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.6424418604651163, |
|
"grad_norm": 1.0987221705498311, |
|
"learning_rate": 3.040166950699626e-06, |
|
"loss": 2.297, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.6431686046511628, |
|
"grad_norm": 1.071287309340485, |
|
"learning_rate": 3.0292742688476125e-06, |
|
"loss": 2.3008, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.6438953488372093, |
|
"grad_norm": 1.0886795715194904, |
|
"learning_rate": 3.018392649123645e-06, |
|
"loss": 2.2998, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.6446220930232558, |
|
"grad_norm": 1.0354258695320315, |
|
"learning_rate": 3.0075221526087083e-06, |
|
"loss": 2.2977, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.6453488372093024, |
|
"grad_norm": 1.2586098674126731, |
|
"learning_rate": 2.9966628403213528e-06, |
|
"loss": 2.3182, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.6460755813953488, |
|
"grad_norm": 1.0639891337581244, |
|
"learning_rate": 2.985814773217346e-06, |
|
"loss": 2.342, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.6468023255813954, |
|
"grad_norm": 1.0142278093049604, |
|
"learning_rate": 2.9749780121893366e-06, |
|
"loss": 2.3273, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6475290697674418, |
|
"grad_norm": 1.1300528337470748, |
|
"learning_rate": 2.964152618066508e-06, |
|
"loss": 2.2904, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.6482558139534884, |
|
"grad_norm": 1.0454947250521127, |
|
"learning_rate": 2.9533386516142402e-06, |
|
"loss": 2.2952, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.6489825581395349, |
|
"grad_norm": 1.0526378472042244, |
|
"learning_rate": 2.9425361735337655e-06, |
|
"loss": 2.2735, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.6497093023255814, |
|
"grad_norm": 1.080243091060958, |
|
"learning_rate": 2.93174524446183e-06, |
|
"loss": 2.2815, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.6504360465116279, |
|
"grad_norm": 1.088488914886554, |
|
"learning_rate": 2.920965924970352e-06, |
|
"loss": 2.2592, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.6511627906976745, |
|
"grad_norm": 1.1122754897361344, |
|
"learning_rate": 2.910198275566085e-06, |
|
"loss": 2.2884, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.6518895348837209, |
|
"grad_norm": 1.0528404554543893, |
|
"learning_rate": 2.899442356690271e-06, |
|
"loss": 2.3596, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.6526162790697675, |
|
"grad_norm": 1.0174816158118893, |
|
"learning_rate": 2.8886982287183092e-06, |
|
"loss": 2.3454, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.653343023255814, |
|
"grad_norm": 1.064816839778644, |
|
"learning_rate": 2.8779659519594173e-06, |
|
"loss": 2.2798, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.6540697674418605, |
|
"grad_norm": 1.1220550056919638, |
|
"learning_rate": 2.8672455866562797e-06, |
|
"loss": 2.3059, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.654796511627907, |
|
"grad_norm": 2.0980951298709227, |
|
"learning_rate": 2.8565371929847286e-06, |
|
"loss": 2.3817, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.6555232558139535, |
|
"grad_norm": 1.081746293600907, |
|
"learning_rate": 2.8458408310533948e-06, |
|
"loss": 2.3264, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 1.7403806177324872, |
|
"learning_rate": 2.835156560903365e-06, |
|
"loss": 2.2865, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.6569767441860465, |
|
"grad_norm": 1.2218329122403868, |
|
"learning_rate": 2.824484442507863e-06, |
|
"loss": 2.3367, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.657703488372093, |
|
"grad_norm": 1.0595512053583265, |
|
"learning_rate": 2.813824535771892e-06, |
|
"loss": 2.3019, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.6584302325581395, |
|
"grad_norm": 1.071380698452199, |
|
"learning_rate": 2.803176900531915e-06, |
|
"loss": 2.3019, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.659156976744186, |
|
"grad_norm": 1.0825188857789525, |
|
"learning_rate": 2.7925415965555126e-06, |
|
"loss": 2.2516, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.6598837209302325, |
|
"grad_norm": 2.920822764065476, |
|
"learning_rate": 2.78191868354104e-06, |
|
"loss": 2.3098, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.6606104651162791, |
|
"grad_norm": 1.0283483488585925, |
|
"learning_rate": 2.771308221117309e-06, |
|
"loss": 2.2796, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.6613372093023255, |
|
"grad_norm": 1.108342894881162, |
|
"learning_rate": 2.760710268843234e-06, |
|
"loss": 2.2898, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6620639534883721, |
|
"grad_norm": 1.2927271073592739, |
|
"learning_rate": 2.7501248862075163e-06, |
|
"loss": 2.314, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.6627906976744186, |
|
"grad_norm": 1.0730365777748851, |
|
"learning_rate": 2.7395521326282913e-06, |
|
"loss": 2.2943, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.6635174418604651, |
|
"grad_norm": 1.053779281550719, |
|
"learning_rate": 2.7289920674528142e-06, |
|
"loss": 2.3405, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.6642441860465116, |
|
"grad_norm": 1.0399607086713838, |
|
"learning_rate": 2.718444749957109e-06, |
|
"loss": 2.3139, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.6649709302325582, |
|
"grad_norm": 1.0368906197250551, |
|
"learning_rate": 2.7079102393456503e-06, |
|
"loss": 2.3257, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.6656976744186046, |
|
"grad_norm": 0.9905272281268529, |
|
"learning_rate": 2.69738859475102e-06, |
|
"loss": 2.2996, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.6664244186046512, |
|
"grad_norm": 1.068690583432205, |
|
"learning_rate": 2.6868798752335867e-06, |
|
"loss": 2.3056, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.6671511627906976, |
|
"grad_norm": 1.0595936340940884, |
|
"learning_rate": 2.6763841397811576e-06, |
|
"loss": 2.2933, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.6678779069767442, |
|
"grad_norm": 1.0457724244809747, |
|
"learning_rate": 2.6659014473086665e-06, |
|
"loss": 2.2715, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.6686046511627907, |
|
"grad_norm": 1.063579439322238, |
|
"learning_rate": 2.655431856657833e-06, |
|
"loss": 2.3071, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.6693313953488372, |
|
"grad_norm": 1.1221698319624531, |
|
"learning_rate": 2.6449754265968263e-06, |
|
"loss": 2.3282, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.6700581395348837, |
|
"grad_norm": 1.2349259940820547, |
|
"learning_rate": 2.6345322158199503e-06, |
|
"loss": 2.2832, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.6707848837209303, |
|
"grad_norm": 1.2893272128889726, |
|
"learning_rate": 2.6241022829473e-06, |
|
"loss": 2.2892, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.6715116279069767, |
|
"grad_norm": 1.0094392612404577, |
|
"learning_rate": 2.6136856865244443e-06, |
|
"loss": 2.2967, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.6722383720930233, |
|
"grad_norm": 1.0773647473259351, |
|
"learning_rate": 2.603282485022085e-06, |
|
"loss": 2.2939, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6729651162790697, |
|
"grad_norm": 1.1004366282951852, |
|
"learning_rate": 2.592892736835742e-06, |
|
"loss": 2.3022, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.6736918604651163, |
|
"grad_norm": 1.209320122969423, |
|
"learning_rate": 2.5825165002854124e-06, |
|
"loss": 2.3231, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.6744186046511628, |
|
"grad_norm": 1.1212132712601037, |
|
"learning_rate": 2.5721538336152553e-06, |
|
"loss": 2.307, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.6751453488372093, |
|
"grad_norm": 1.091673096600694, |
|
"learning_rate": 2.5618047949932524e-06, |
|
"loss": 2.2216, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.6758720930232558, |
|
"grad_norm": 1.0773898030071996, |
|
"learning_rate": 2.5514694425108968e-06, |
|
"loss": 2.2849, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6765988372093024, |
|
"grad_norm": 1.1276874114176263, |
|
"learning_rate": 2.5411478341828475e-06, |
|
"loss": 2.2728, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.6773255813953488, |
|
"grad_norm": 1.0562814394201008, |
|
"learning_rate": 2.5308400279466262e-06, |
|
"loss": 2.2255, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.6780523255813954, |
|
"grad_norm": 1.2901606524754075, |
|
"learning_rate": 2.5205460816622684e-06, |
|
"loss": 2.3445, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.6787790697674418, |
|
"grad_norm": 1.353015885378886, |
|
"learning_rate": 2.5102660531120204e-06, |
|
"loss": 2.2881, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.6795058139534884, |
|
"grad_norm": 1.032764934565106, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 2.252, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6802325581395349, |
|
"grad_norm": 1.0298405135241169, |
|
"learning_rate": 2.4897479799518797e-06, |
|
"loss": 2.2612, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.6809593023255814, |
|
"grad_norm": 1.0157695794323445, |
|
"learning_rate": 2.479510050514561e-06, |
|
"loss": 2.3077, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.6816860465116279, |
|
"grad_norm": 0.9944505259481613, |
|
"learning_rate": 2.469286269155848e-06, |
|
"loss": 2.2568, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.6824127906976745, |
|
"grad_norm": 1.2455482005978737, |
|
"learning_rate": 2.4590766932641353e-06, |
|
"loss": 2.2888, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.6831395348837209, |
|
"grad_norm": 1.0148580442463242, |
|
"learning_rate": 2.4488813801480717e-06, |
|
"loss": 2.2671, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6838662790697675, |
|
"grad_norm": 1.0647336115989892, |
|
"learning_rate": 2.438700387036253e-06, |
|
"loss": 2.2511, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.684593023255814, |
|
"grad_norm": 1.1906818174033162, |
|
"learning_rate": 2.4285337710768843e-06, |
|
"loss": 2.3056, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.6853197674418605, |
|
"grad_norm": 1.045245906467335, |
|
"learning_rate": 2.4183815893374817e-06, |
|
"loss": 2.2748, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.686046511627907, |
|
"grad_norm": 1.2179174742303633, |
|
"learning_rate": 2.4082438988045253e-06, |
|
"loss": 2.3691, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.6867732558139535, |
|
"grad_norm": 1.0939754996774316, |
|
"learning_rate": 2.3981207563831633e-06, |
|
"loss": 2.2668, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 1.0410688427836987, |
|
"learning_rate": 2.388012218896873e-06, |
|
"loss": 2.275, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.6882267441860465, |
|
"grad_norm": 1.0845983862848876, |
|
"learning_rate": 2.3779183430871596e-06, |
|
"loss": 2.2835, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.688953488372093, |
|
"grad_norm": 1.1420315061262278, |
|
"learning_rate": 2.3678391856132203e-06, |
|
"loss": 2.3258, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.6896802325581395, |
|
"grad_norm": 1.1645336497292325, |
|
"learning_rate": 2.3577748030516443e-06, |
|
"loss": 2.3346, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.690406976744186, |
|
"grad_norm": 1.1103520886088816, |
|
"learning_rate": 2.3477252518960764e-06, |
|
"loss": 2.2921, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6911337209302325, |
|
"grad_norm": 1.0138661713171222, |
|
"learning_rate": 2.3376905885569185e-06, |
|
"loss": 2.2863, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.6918604651162791, |
|
"grad_norm": 1.7747130578282735, |
|
"learning_rate": 2.3276708693609947e-06, |
|
"loss": 2.3005, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.6925872093023255, |
|
"grad_norm": 1.2120454663077505, |
|
"learning_rate": 2.3176661505512534e-06, |
|
"loss": 2.2774, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.6933139534883721, |
|
"grad_norm": 1.139318798147674, |
|
"learning_rate": 2.3076764882864333e-06, |
|
"loss": 2.3056, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.6940406976744186, |
|
"grad_norm": 1.0685661289451227, |
|
"learning_rate": 2.2977019386407653e-06, |
|
"loss": 2.2886, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.6947674418604651, |
|
"grad_norm": 1.1453862770943386, |
|
"learning_rate": 2.2877425576036467e-06, |
|
"loss": 2.3176, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.6954941860465116, |
|
"grad_norm": 1.0783921305254762, |
|
"learning_rate": 2.2777984010793264e-06, |
|
"loss": 2.4089, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.6962209302325582, |
|
"grad_norm": 1.071086466220267, |
|
"learning_rate": 2.267869524886603e-06, |
|
"loss": 2.2986, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.6969476744186046, |
|
"grad_norm": 1.0942023435068833, |
|
"learning_rate": 2.2579559847584924e-06, |
|
"loss": 2.2728, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.6976744186046512, |
|
"grad_norm": 1.2455437847883444, |
|
"learning_rate": 2.2480578363419363e-06, |
|
"loss": 2.2967, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6984011627906976, |
|
"grad_norm": 0.9971129904259833, |
|
"learning_rate": 2.238175135197471e-06, |
|
"loss": 2.3308, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.6991279069767442, |
|
"grad_norm": 1.070897723373824, |
|
"learning_rate": 2.2283079367989303e-06, |
|
"loss": 2.2437, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.6998546511627907, |
|
"grad_norm": 1.0863749709759165, |
|
"learning_rate": 2.2184562965331203e-06, |
|
"loss": 2.303, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7005813953488372, |
|
"grad_norm": 1.0311502311512875, |
|
"learning_rate": 2.2086202696995248e-06, |
|
"loss": 2.2846, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7013081395348837, |
|
"grad_norm": 1.0623408936048355, |
|
"learning_rate": 2.1987999115099763e-06, |
|
"loss": 2.2579, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7020348837209303, |
|
"grad_norm": 1.0153193816029904, |
|
"learning_rate": 2.1889952770883644e-06, |
|
"loss": 2.2474, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7027616279069767, |
|
"grad_norm": 1.0440507740384226, |
|
"learning_rate": 2.17920642147031e-06, |
|
"loss": 2.2733, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7034883720930233, |
|
"grad_norm": 1.1511064044424957, |
|
"learning_rate": 2.169433399602872e-06, |
|
"loss": 2.3142, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7042151162790697, |
|
"grad_norm": 1.053669842381643, |
|
"learning_rate": 2.159676266344222e-06, |
|
"loss": 2.3098, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7049418604651163, |
|
"grad_norm": 1.0471770020331883, |
|
"learning_rate": 2.1499350764633513e-06, |
|
"loss": 2.2895, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7056686046511628, |
|
"grad_norm": 1.4571732825727595, |
|
"learning_rate": 2.140209884639759e-06, |
|
"loss": 2.2841, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7063953488372093, |
|
"grad_norm": 1.0880769462399786, |
|
"learning_rate": 2.130500745463136e-06, |
|
"loss": 2.2991, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7071220930232558, |
|
"grad_norm": 1.037540633256132, |
|
"learning_rate": 2.120807713433074e-06, |
|
"loss": 2.2681, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7078488372093024, |
|
"grad_norm": 1.0696892208948963, |
|
"learning_rate": 2.1111308429587446e-06, |
|
"loss": 2.2959, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7085755813953488, |
|
"grad_norm": 1.0208444816267834, |
|
"learning_rate": 2.1014701883586087e-06, |
|
"loss": 2.2748, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7093023255813954, |
|
"grad_norm": 1.0418009858188437, |
|
"learning_rate": 2.091825803860095e-06, |
|
"loss": 2.2743, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7100290697674418, |
|
"grad_norm": 1.0236164793641587, |
|
"learning_rate": 2.082197743599314e-06, |
|
"loss": 2.3051, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7107558139534884, |
|
"grad_norm": 1.208585374069975, |
|
"learning_rate": 2.072586061620735e-06, |
|
"loss": 2.2571, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7114825581395349, |
|
"grad_norm": 1.123554743123603, |
|
"learning_rate": 2.0629908118769004e-06, |
|
"loss": 2.2774, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7122093023255814, |
|
"grad_norm": 1.070269970918954, |
|
"learning_rate": 2.0534120482281087e-06, |
|
"loss": 2.3054, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7129360465116279, |
|
"grad_norm": 1.0520946805260354, |
|
"learning_rate": 2.043849824442124e-06, |
|
"loss": 2.3158, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7136627906976745, |
|
"grad_norm": 1.0634540989868737, |
|
"learning_rate": 2.034304194193861e-06, |
|
"loss": 2.2118, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7143895348837209, |
|
"grad_norm": 1.0132991218437926, |
|
"learning_rate": 2.024775211065098e-06, |
|
"loss": 2.2696, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7151162790697675, |
|
"grad_norm": 1.018246298526723, |
|
"learning_rate": 2.0152629285441668e-06, |
|
"loss": 2.248, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.715843023255814, |
|
"grad_norm": 1.1097222649464207, |
|
"learning_rate": 2.0057674000256556e-06, |
|
"loss": 2.3706, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7165697674418605, |
|
"grad_norm": 1.1636566819216903, |
|
"learning_rate": 1.996288678810105e-06, |
|
"loss": 2.3096, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.717296511627907, |
|
"grad_norm": 1.1627061516985389, |
|
"learning_rate": 1.9868268181037186e-06, |
|
"loss": 2.3061, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7180232558139535, |
|
"grad_norm": 1.0306901524108303, |
|
"learning_rate": 1.9773818710180514e-06, |
|
"loss": 2.323, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 1.0526076940743654, |
|
"learning_rate": 1.967953890569723e-06, |
|
"loss": 2.2851, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7194767441860465, |
|
"grad_norm": 1.8419498783617658, |
|
"learning_rate": 1.958542929680117e-06, |
|
"loss": 2.2927, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.720203488372093, |
|
"grad_norm": 1.1109141166503629, |
|
"learning_rate": 1.9491490411750745e-06, |
|
"loss": 2.2877, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7209302325581395, |
|
"grad_norm": 1.0479816810432738, |
|
"learning_rate": 1.9397722777846153e-06, |
|
"loss": 2.291, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.721656976744186, |
|
"grad_norm": 1.0459670031607213, |
|
"learning_rate": 1.9304126921426235e-06, |
|
"loss": 2.2751, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7223837209302325, |
|
"grad_norm": 1.0525843890419861, |
|
"learning_rate": 1.921070336786568e-06, |
|
"loss": 2.3021, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7231104651162791, |
|
"grad_norm": 1.0601706121307002, |
|
"learning_rate": 1.9117452641571934e-06, |
|
"loss": 2.3253, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7238372093023255, |
|
"grad_norm": 1.2357758164161554, |
|
"learning_rate": 1.9024375265982386e-06, |
|
"loss": 2.2932, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7245639534883721, |
|
"grad_norm": 1.191064713641231, |
|
"learning_rate": 1.893147176356131e-06, |
|
"loss": 2.3273, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7252906976744186, |
|
"grad_norm": 1.2241505412066669, |
|
"learning_rate": 1.8838742655797053e-06, |
|
"loss": 2.2872, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7260174418604651, |
|
"grad_norm": 1.1096350635181376, |
|
"learning_rate": 1.8746188463198983e-06, |
|
"loss": 2.2533, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7267441860465116, |
|
"grad_norm": 1.0810442272478031, |
|
"learning_rate": 1.865380970529469e-06, |
|
"loss": 2.2439, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7274709302325582, |
|
"grad_norm": 1.0611734339943502, |
|
"learning_rate": 1.8561606900626938e-06, |
|
"loss": 2.3247, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.7281976744186046, |
|
"grad_norm": 1.339243616520072, |
|
"learning_rate": 1.8469580566750911e-06, |
|
"loss": 2.2827, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.7289244186046512, |
|
"grad_norm": 1.1081621018474208, |
|
"learning_rate": 1.8377731220231144e-06, |
|
"loss": 2.3357, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.7296511627906976, |
|
"grad_norm": 1.0403890016606065, |
|
"learning_rate": 1.8286059376638748e-06, |
|
"loss": 2.283, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.7303779069767442, |
|
"grad_norm": 1.0630454548978983, |
|
"learning_rate": 1.8194565550548477e-06, |
|
"loss": 2.283, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.7311046511627907, |
|
"grad_norm": 1.00735081057976, |
|
"learning_rate": 1.810325025553578e-06, |
|
"loss": 2.2705, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.7318313953488372, |
|
"grad_norm": 1.0768984635152028, |
|
"learning_rate": 1.8012114004174048e-06, |
|
"loss": 2.2943, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.7325581395348837, |
|
"grad_norm": 1.126696129099283, |
|
"learning_rate": 1.7921157308031567e-06, |
|
"loss": 2.3179, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.7332848837209303, |
|
"grad_norm": 1.0160803840046975, |
|
"learning_rate": 1.7830380677668836e-06, |
|
"loss": 2.3228, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.7340116279069767, |
|
"grad_norm": 1.0796523647513225, |
|
"learning_rate": 1.7739784622635514e-06, |
|
"loss": 2.2307, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7347383720930233, |
|
"grad_norm": 1.0462192247450826, |
|
"learning_rate": 1.764936965146773e-06, |
|
"loss": 2.2933, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.7354651162790697, |
|
"grad_norm": 1.0331053296317911, |
|
"learning_rate": 1.7559136271685079e-06, |
|
"loss": 2.2922, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.7361918604651163, |
|
"grad_norm": 0.9519678790569631, |
|
"learning_rate": 1.746908498978791e-06, |
|
"loss": 2.2542, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.7369186046511628, |
|
"grad_norm": 1.0833415039732, |
|
"learning_rate": 1.7379216311254339e-06, |
|
"loss": 2.2388, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.7376453488372093, |
|
"grad_norm": 1.0619106386944168, |
|
"learning_rate": 1.7289530740537569e-06, |
|
"loss": 2.2501, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.7383720930232558, |
|
"grad_norm": 1.076815203497985, |
|
"learning_rate": 1.72000287810629e-06, |
|
"loss": 2.329, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.7390988372093024, |
|
"grad_norm": 1.1080069971444473, |
|
"learning_rate": 1.7110710935225055e-06, |
|
"loss": 2.2878, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.7398255813953488, |
|
"grad_norm": 1.1611753091102224, |
|
"learning_rate": 1.7021577704385218e-06, |
|
"loss": 2.2986, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.7405523255813954, |
|
"grad_norm": 1.017226365288529, |
|
"learning_rate": 1.6932629588868332e-06, |
|
"loss": 2.3679, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.7412790697674418, |
|
"grad_norm": 1.1889012837812845, |
|
"learning_rate": 1.6843867087960252e-06, |
|
"loss": 2.2629, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7420058139534884, |
|
"grad_norm": 1.059279124205967, |
|
"learning_rate": 1.6755290699904881e-06, |
|
"loss": 2.3013, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.7427325581395349, |
|
"grad_norm": 1.0182359613614196, |
|
"learning_rate": 1.6666900921901497e-06, |
|
"loss": 2.3037, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.7434593023255814, |
|
"grad_norm": 1.0912666761581755, |
|
"learning_rate": 1.6578698250101828e-06, |
|
"loss": 2.3042, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 1.2496008211453598, |
|
"learning_rate": 1.6490683179607403e-06, |
|
"loss": 2.2539, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.7449127906976745, |
|
"grad_norm": 1.0449183892310232, |
|
"learning_rate": 1.6402856204466611e-06, |
|
"loss": 2.2656, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.7456395348837209, |
|
"grad_norm": 1.0109361865487376, |
|
"learning_rate": 1.6315217817672142e-06, |
|
"loss": 2.3545, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.7463662790697675, |
|
"grad_norm": 1.9222203824349149, |
|
"learning_rate": 1.6227768511157976e-06, |
|
"loss": 2.2807, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.747093023255814, |
|
"grad_norm": 1.015484674656179, |
|
"learning_rate": 1.6140508775796832e-06, |
|
"loss": 2.2515, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.7478197674418605, |
|
"grad_norm": 1.0079500670175678, |
|
"learning_rate": 1.6053439101397257e-06, |
|
"loss": 2.2943, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.748546511627907, |
|
"grad_norm": 1.0661148978290145, |
|
"learning_rate": 1.5966559976701e-06, |
|
"loss": 2.3096, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7492732558139535, |
|
"grad_norm": 1.0568451868453557, |
|
"learning_rate": 1.5879871889380155e-06, |
|
"loss": 2.3256, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.05938722775654, |
|
"learning_rate": 1.5793375326034539e-06, |
|
"loss": 2.3017, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.7507267441860465, |
|
"grad_norm": 1.0107621685449764, |
|
"learning_rate": 1.5707070772188843e-06, |
|
"loss": 2.3019, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.751453488372093, |
|
"grad_norm": 1.157820058838249, |
|
"learning_rate": 1.5620958712290023e-06, |
|
"loss": 2.245, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.7521802325581395, |
|
"grad_norm": 0.9901772476736681, |
|
"learning_rate": 1.5535039629704467e-06, |
|
"loss": 2.2744, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.752906976744186, |
|
"grad_norm": 1.4571372969257075, |
|
"learning_rate": 1.5449314006715394e-06, |
|
"loss": 2.3006, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.7536337209302325, |
|
"grad_norm": 1.0727364544563056, |
|
"learning_rate": 1.5363782324520033e-06, |
|
"loss": 2.3023, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.7543604651162791, |
|
"grad_norm": 1.051295726249027, |
|
"learning_rate": 1.5278445063227038e-06, |
|
"loss": 2.2896, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.7550872093023255, |
|
"grad_norm": 1.1272815731891954, |
|
"learning_rate": 1.5193302701853674e-06, |
|
"loss": 2.2988, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.7558139534883721, |
|
"grad_norm": 1.0502184723821115, |
|
"learning_rate": 1.5108355718323236e-06, |
|
"loss": 2.275, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7565406976744186, |
|
"grad_norm": 1.0583070742342378, |
|
"learning_rate": 1.502360458946232e-06, |
|
"loss": 2.2462, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.7572674418604651, |
|
"grad_norm": 1.0060059414695133, |
|
"learning_rate": 1.4939049790998095e-06, |
|
"loss": 2.3153, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.7579941860465116, |
|
"grad_norm": 1.158848162270058, |
|
"learning_rate": 1.4854691797555753e-06, |
|
"loss": 2.3023, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.7587209302325582, |
|
"grad_norm": 1.0656483573077637, |
|
"learning_rate": 1.4770531082655704e-06, |
|
"loss": 2.3101, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.7594476744186046, |
|
"grad_norm": 1.2449865044376767, |
|
"learning_rate": 1.4686568118711054e-06, |
|
"loss": 2.3007, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.7601744186046512, |
|
"grad_norm": 1.0262544542085497, |
|
"learning_rate": 1.4602803377024833e-06, |
|
"loss": 2.3016, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.7609011627906976, |
|
"grad_norm": 1.060293437802111, |
|
"learning_rate": 1.451923732778745e-06, |
|
"loss": 2.3011, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.7616279069767442, |
|
"grad_norm": 1.28456037123509, |
|
"learning_rate": 1.4435870440073968e-06, |
|
"loss": 2.2614, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.7623546511627907, |
|
"grad_norm": 1.0443395729679117, |
|
"learning_rate": 1.435270318184156e-06, |
|
"loss": 2.2908, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.7630813953488372, |
|
"grad_norm": 1.0490096593491272, |
|
"learning_rate": 1.4269736019926778e-06, |
|
"loss": 2.2691, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7638081395348837, |
|
"grad_norm": 1.4580699017767131, |
|
"learning_rate": 1.418696942004304e-06, |
|
"loss": 2.276, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.7645348837209303, |
|
"grad_norm": 1.0554824961663374, |
|
"learning_rate": 1.410440384677791e-06, |
|
"loss": 2.2815, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.7652616279069767, |
|
"grad_norm": 1.0445976304458051, |
|
"learning_rate": 1.4022039763590595e-06, |
|
"loss": 2.3219, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.7659883720930233, |
|
"grad_norm": 3.0454676377565466, |
|
"learning_rate": 1.3939877632809279e-06, |
|
"loss": 2.3021, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.7667151162790697, |
|
"grad_norm": 1.0944528530649524, |
|
"learning_rate": 1.3857917915628516e-06, |
|
"loss": 2.2589, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.7674418604651163, |
|
"grad_norm": 1.4188459634174018, |
|
"learning_rate": 1.3776161072106703e-06, |
|
"loss": 2.3215, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.7681686046511628, |
|
"grad_norm": 1.0431343233534842, |
|
"learning_rate": 1.369460756116342e-06, |
|
"loss": 2.2651, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.7688953488372093, |
|
"grad_norm": 1.1271722033081277, |
|
"learning_rate": 1.3613257840576954e-06, |
|
"loss": 2.2626, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.7696220930232558, |
|
"grad_norm": 0.9807905284115528, |
|
"learning_rate": 1.3532112366981598e-06, |
|
"loss": 2.2429, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.7703488372093024, |
|
"grad_norm": 1.0792539906176082, |
|
"learning_rate": 1.3451171595865226e-06, |
|
"loss": 2.2441, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.7710755813953488, |
|
"grad_norm": 1.014599603945714, |
|
"learning_rate": 1.3370435981566622e-06, |
|
"loss": 2.3012, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.7718023255813954, |
|
"grad_norm": 1.039382145166407, |
|
"learning_rate": 1.3289905977273027e-06, |
|
"loss": 2.2614, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.7725290697674418, |
|
"grad_norm": 1.110162802251219, |
|
"learning_rate": 1.3209582035017487e-06, |
|
"loss": 2.3009, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.7732558139534884, |
|
"grad_norm": 1.0268137021721464, |
|
"learning_rate": 1.312946460567644e-06, |
|
"loss": 2.2988, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.7739825581395349, |
|
"grad_norm": 0.9991099915776216, |
|
"learning_rate": 1.3049554138967052e-06, |
|
"loss": 2.2574, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.7747093023255814, |
|
"grad_norm": 1.096594932836595, |
|
"learning_rate": 1.2969851083444834e-06, |
|
"loss": 2.3029, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.7754360465116279, |
|
"grad_norm": 1.0331471723360561, |
|
"learning_rate": 1.2890355886500971e-06, |
|
"loss": 2.3349, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.7761627906976745, |
|
"grad_norm": 0.9989200321985331, |
|
"learning_rate": 1.2811068994359992e-06, |
|
"loss": 2.2743, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.7768895348837209, |
|
"grad_norm": 1.0334842215623463, |
|
"learning_rate": 1.273199085207706e-06, |
|
"loss": 2.2318, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.7776162790697675, |
|
"grad_norm": 1.1166669421482618, |
|
"learning_rate": 1.2653121903535653e-06, |
|
"loss": 2.3044, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.778343023255814, |
|
"grad_norm": 1.0544628135207288, |
|
"learning_rate": 1.257446259144494e-06, |
|
"loss": 2.3597, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.7790697674418605, |
|
"grad_norm": 1.0101121441444254, |
|
"learning_rate": 1.2496013357337416e-06, |
|
"loss": 2.2348, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.779796511627907, |
|
"grad_norm": 1.0856552266206294, |
|
"learning_rate": 1.2417774641566298e-06, |
|
"loss": 2.2915, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.7805232558139535, |
|
"grad_norm": 1.008999510135277, |
|
"learning_rate": 1.233974688330315e-06, |
|
"loss": 2.3178, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 0.9934991466078096, |
|
"learning_rate": 1.2261930520535403e-06, |
|
"loss": 2.2697, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7819767441860465, |
|
"grad_norm": 1.1302730062371027, |
|
"learning_rate": 1.2184325990063822e-06, |
|
"loss": 2.313, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.782703488372093, |
|
"grad_norm": 1.0051323138300596, |
|
"learning_rate": 1.210693372750017e-06, |
|
"loss": 2.3108, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.7834302325581395, |
|
"grad_norm": 1.3727245547649582, |
|
"learning_rate": 1.202975416726464e-06, |
|
"loss": 2.2932, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.784156976744186, |
|
"grad_norm": 0.9962127663435182, |
|
"learning_rate": 1.1952787742583549e-06, |
|
"loss": 2.2928, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.7848837209302325, |
|
"grad_norm": 2.918506835943175, |
|
"learning_rate": 1.1876034885486764e-06, |
|
"loss": 2.2867, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7856104651162791, |
|
"grad_norm": 1.002517901009031, |
|
"learning_rate": 1.1799496026805413e-06, |
|
"loss": 2.3079, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.7863372093023255, |
|
"grad_norm": 1.0608837488625182, |
|
"learning_rate": 1.1723171596169353e-06, |
|
"loss": 2.2952, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.7870639534883721, |
|
"grad_norm": 1.0495070164322962, |
|
"learning_rate": 1.1647062022004845e-06, |
|
"loss": 2.2838, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.7877906976744186, |
|
"grad_norm": 0.9747665315176195, |
|
"learning_rate": 1.157116773153208e-06, |
|
"loss": 2.3577, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.7885174418604651, |
|
"grad_norm": 0.995930671706204, |
|
"learning_rate": 1.1495489150762851e-06, |
|
"loss": 2.3267, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7892441860465116, |
|
"grad_norm": 1.0298574764276371, |
|
"learning_rate": 1.1420026704498077e-06, |
|
"loss": 2.2693, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.7899709302325582, |
|
"grad_norm": 1.3353782375384569, |
|
"learning_rate": 1.1344780816325512e-06, |
|
"loss": 2.2941, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.7906976744186046, |
|
"grad_norm": 1.0042512833949695, |
|
"learning_rate": 1.1269751908617277e-06, |
|
"loss": 2.2895, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.7914244186046512, |
|
"grad_norm": 1.0159794876634873, |
|
"learning_rate": 1.1194940402527566e-06, |
|
"loss": 2.3337, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.7921511627906976, |
|
"grad_norm": 1.7170986311408605, |
|
"learning_rate": 1.112034671799025e-06, |
|
"loss": 2.2687, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7928779069767442, |
|
"grad_norm": 1.0720092333346907, |
|
"learning_rate": 1.1045971273716476e-06, |
|
"loss": 2.2731, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.7936046511627907, |
|
"grad_norm": 1.0901546781468832, |
|
"learning_rate": 1.0971814487192429e-06, |
|
"loss": 2.2649, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.7943313953488372, |
|
"grad_norm": 1.0068271176988854, |
|
"learning_rate": 1.089787677467683e-06, |
|
"loss": 2.3544, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.7950581395348837, |
|
"grad_norm": 1.143554056516148, |
|
"learning_rate": 1.0824158551198783e-06, |
|
"loss": 2.3312, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.7957848837209303, |
|
"grad_norm": 1.0257503226390932, |
|
"learning_rate": 1.075066023055527e-06, |
|
"loss": 2.3307, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7965116279069767, |
|
"grad_norm": 1.0864265176648018, |
|
"learning_rate": 1.0677382225308969e-06, |
|
"loss": 2.3023, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.7972383720930233, |
|
"grad_norm": 1.4336976428817494, |
|
"learning_rate": 1.0604324946785826e-06, |
|
"loss": 2.2618, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.7979651162790697, |
|
"grad_norm": 1.0165625672732432, |
|
"learning_rate": 1.0531488805072848e-06, |
|
"loss": 2.2923, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.7986918604651163, |
|
"grad_norm": 2.8438555195834705, |
|
"learning_rate": 1.0458874209015708e-06, |
|
"loss": 2.2758, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.7994186046511628, |
|
"grad_norm": 1.011889649770043, |
|
"learning_rate": 1.0386481566216532e-06, |
|
"loss": 2.2674, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8001453488372093, |
|
"grad_norm": 0.9698911337581984, |
|
"learning_rate": 1.0314311283031531e-06, |
|
"loss": 2.261, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8008720930232558, |
|
"grad_norm": 1.0090528280127107, |
|
"learning_rate": 1.0242363764568808e-06, |
|
"loss": 2.2956, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8015988372093024, |
|
"grad_norm": 1.213795376548453, |
|
"learning_rate": 1.0170639414685985e-06, |
|
"loss": 2.2651, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8023255813953488, |
|
"grad_norm": 1.0633109792614825, |
|
"learning_rate": 1.0099138635988026e-06, |
|
"loss": 2.2556, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8030523255813954, |
|
"grad_norm": 1.1236794950217257, |
|
"learning_rate": 1.0027861829824953e-06, |
|
"loss": 2.3028, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8037790697674418, |
|
"grad_norm": 1.0179145252465904, |
|
"learning_rate": 9.956809396289519e-07, |
|
"loss": 2.2882, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8045058139534884, |
|
"grad_norm": 1.0440708463771915, |
|
"learning_rate": 9.885981734215094e-07, |
|
"loss": 2.2323, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8052325581395349, |
|
"grad_norm": 1.001838305148324, |
|
"learning_rate": 9.815379241173295e-07, |
|
"loss": 2.2875, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8059593023255814, |
|
"grad_norm": 1.0012314774080968, |
|
"learning_rate": 9.745002313471847e-07, |
|
"loss": 2.2784, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.8066860465116279, |
|
"grad_norm": 1.3260411675767552, |
|
"learning_rate": 9.67485134615232e-07, |
|
"loss": 2.2714, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8074127906976745, |
|
"grad_norm": 0.9786969103850816, |
|
"learning_rate": 9.60492673298794e-07, |
|
"loss": 2.2664, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8081395348837209, |
|
"grad_norm": 1.5073197015028201, |
|
"learning_rate": 9.535228866481295e-07, |
|
"loss": 2.2279, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8088662790697675, |
|
"grad_norm": 1.056086008875671, |
|
"learning_rate": 9.465758137862264e-07, |
|
"loss": 2.2881, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.809593023255814, |
|
"grad_norm": 1.0809504924319808, |
|
"learning_rate": 9.396514937085682e-07, |
|
"loss": 2.3032, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8103197674418605, |
|
"grad_norm": 1.2486681539195614, |
|
"learning_rate": 9.327499652829292e-07, |
|
"loss": 2.3155, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.811046511627907, |
|
"grad_norm": 1.0213292959789695, |
|
"learning_rate": 9.258712672491416e-07, |
|
"loss": 2.2899, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8117732558139535, |
|
"grad_norm": 1.0461554742483217, |
|
"learning_rate": 9.190154382188921e-07, |
|
"loss": 2.3477, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 1.0675949468870904, |
|
"learning_rate": 9.121825166754927e-07, |
|
"loss": 2.2543, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8132267441860465, |
|
"grad_norm": 2.530653282570627, |
|
"learning_rate": 9.053725409736752e-07, |
|
"loss": 2.3301, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.813953488372093, |
|
"grad_norm": 0.9984512358752262, |
|
"learning_rate": 8.98585549339368e-07, |
|
"loss": 2.3042, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8146802325581395, |
|
"grad_norm": 1.0213185627968009, |
|
"learning_rate": 8.918215798694879e-07, |
|
"loss": 2.2799, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.815406976744186, |
|
"grad_norm": 1.0613752567095163, |
|
"learning_rate": 8.850806705317183e-07, |
|
"loss": 2.2999, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8161337209302325, |
|
"grad_norm": 1.0310489044310347, |
|
"learning_rate": 8.783628591643056e-07, |
|
"loss": 2.3235, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8168604651162791, |
|
"grad_norm": 4.283745359968558, |
|
"learning_rate": 8.716681834758411e-07, |
|
"loss": 2.3211, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8175872093023255, |
|
"grad_norm": 1.331778348277442, |
|
"learning_rate": 8.649966810450472e-07, |
|
"loss": 2.341, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.8183139534883721, |
|
"grad_norm": 1.0415010268575644, |
|
"learning_rate": 8.583483893205746e-07, |
|
"loss": 2.2896, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.8190406976744186, |
|
"grad_norm": 1.006456220083455, |
|
"learning_rate": 8.517233456207819e-07, |
|
"loss": 2.3029, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.8197674418604651, |
|
"grad_norm": 1.6510497568308151, |
|
"learning_rate": 8.451215871335355e-07, |
|
"loss": 2.308, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.8204941860465116, |
|
"grad_norm": 1.0156351391245906, |
|
"learning_rate": 8.38543150915993e-07, |
|
"loss": 2.3161, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.8212209302325582, |
|
"grad_norm": 1.0120097967166009, |
|
"learning_rate": 8.31988073894403e-07, |
|
"loss": 2.2806, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8219476744186046, |
|
"grad_norm": 0.9976328982359449, |
|
"learning_rate": 8.254563928638892e-07, |
|
"loss": 2.356, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.8226744186046512, |
|
"grad_norm": 0.9964751712960173, |
|
"learning_rate": 8.189481444882524e-07, |
|
"loss": 2.248, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.8234011627906976, |
|
"grad_norm": 1.233431757610957, |
|
"learning_rate": 8.124633652997571e-07, |
|
"loss": 2.2668, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.8241279069767442, |
|
"grad_norm": 1.0780894801654444, |
|
"learning_rate": 8.060020916989331e-07, |
|
"loss": 2.303, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.8248546511627907, |
|
"grad_norm": 1.122308399785914, |
|
"learning_rate": 7.995643599543645e-07, |
|
"loss": 2.3352, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.8255813953488372, |
|
"grad_norm": 1.0627182539952464, |
|
"learning_rate": 7.931502062024949e-07, |
|
"loss": 2.275, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.8263081395348837, |
|
"grad_norm": 1.126076955118742, |
|
"learning_rate": 7.86759666447412e-07, |
|
"loss": 2.2813, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.8270348837209303, |
|
"grad_norm": 1.4854164415590436, |
|
"learning_rate": 7.803927765606595e-07, |
|
"loss": 2.3282, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.8277616279069767, |
|
"grad_norm": 1.0570569678350674, |
|
"learning_rate": 7.740495722810271e-07, |
|
"loss": 2.3106, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.8284883720930233, |
|
"grad_norm": 0.9347752540270663, |
|
"learning_rate": 7.677300892143485e-07, |
|
"loss": 2.2337, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8292151162790697, |
|
"grad_norm": 0.9579877274387915, |
|
"learning_rate": 7.614343628333104e-07, |
|
"loss": 2.2958, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.8299418604651163, |
|
"grad_norm": 1.1249688280208123, |
|
"learning_rate": 7.55162428477243e-07, |
|
"loss": 2.2656, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.8306686046511628, |
|
"grad_norm": 0.9756167229918767, |
|
"learning_rate": 7.489143213519301e-07, |
|
"loss": 2.2924, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.8313953488372093, |
|
"grad_norm": 1.044572083475489, |
|
"learning_rate": 7.426900765294043e-07, |
|
"loss": 2.2677, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.8321220930232558, |
|
"grad_norm": 1.379796567891184, |
|
"learning_rate": 7.364897289477585e-07, |
|
"loss": 2.3265, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.8328488372093024, |
|
"grad_norm": 1.0695146040268584, |
|
"learning_rate": 7.303133134109391e-07, |
|
"loss": 2.3288, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.8335755813953488, |
|
"grad_norm": 1.016987223129461, |
|
"learning_rate": 7.241608645885629e-07, |
|
"loss": 2.2986, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.8343023255813954, |
|
"grad_norm": 1.1916192934614125, |
|
"learning_rate": 7.180324170157094e-07, |
|
"loss": 2.2874, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.8350290697674418, |
|
"grad_norm": 1.0839138661442358, |
|
"learning_rate": 7.119280050927407e-07, |
|
"loss": 2.265, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.8357558139534884, |
|
"grad_norm": 1.0304987400941559, |
|
"learning_rate": 7.058476630850935e-07, |
|
"loss": 2.2685, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8364825581395349, |
|
"grad_norm": 1.0321949762195342, |
|
"learning_rate": 6.997914251231036e-07, |
|
"loss": 2.3048, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.8372093023255814, |
|
"grad_norm": 1.0154567754879218, |
|
"learning_rate": 6.937593252017983e-07, |
|
"loss": 2.2805, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.8379360465116279, |
|
"grad_norm": 1.0838324246811337, |
|
"learning_rate": 6.87751397180716e-07, |
|
"loss": 2.2884, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.8386627906976745, |
|
"grad_norm": 1.0897256752726174, |
|
"learning_rate": 6.817676747837104e-07, |
|
"loss": 2.3718, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.8393895348837209, |
|
"grad_norm": 1.2095702202073741, |
|
"learning_rate": 6.758081915987669e-07, |
|
"loss": 2.2704, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.8401162790697675, |
|
"grad_norm": 1.0032802281153688, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 2.3228, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.840843023255814, |
|
"grad_norm": 1.2006946309581232, |
|
"learning_rate": 6.639620765365074e-07, |
|
"loss": 2.3023, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.8415697674418605, |
|
"grad_norm": 1.09168549344349, |
|
"learning_rate": 6.580755111541076e-07, |
|
"loss": 2.346, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.842296511627907, |
|
"grad_norm": 1.1606076057102177, |
|
"learning_rate": 6.522133179732271e-07, |
|
"loss": 2.371, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.8430232558139535, |
|
"grad_norm": 0.9745185257832802, |
|
"learning_rate": 6.463755298996799e-07, |
|
"loss": 2.241, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 0.9926775523213874, |
|
"learning_rate": 6.405621797022848e-07, |
|
"loss": 2.2971, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.8444767441860465, |
|
"grad_norm": 1.0589270188129587, |
|
"learning_rate": 6.347733000126899e-07, |
|
"loss": 2.2864, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.845203488372093, |
|
"grad_norm": 1.1986749871451796, |
|
"learning_rate": 6.290089233251811e-07, |
|
"loss": 2.2959, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.8459302325581395, |
|
"grad_norm": 0.9904095687889044, |
|
"learning_rate": 6.232690819965065e-07, |
|
"loss": 2.2937, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.846656976744186, |
|
"grad_norm": 1.000092587187146, |
|
"learning_rate": 6.175538082456883e-07, |
|
"loss": 2.2807, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.8473837209302325, |
|
"grad_norm": 1.6370550141375801, |
|
"learning_rate": 6.118631341538489e-07, |
|
"loss": 2.2839, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.8481104651162791, |
|
"grad_norm": 1.007731213927358, |
|
"learning_rate": 6.061970916640236e-07, |
|
"loss": 2.2792, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.8488372093023255, |
|
"grad_norm": 1.052835634775134, |
|
"learning_rate": 6.005557125809896e-07, |
|
"loss": 2.2941, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.8495639534883721, |
|
"grad_norm": 1.2895324602216356, |
|
"learning_rate": 5.949390285710777e-07, |
|
"loss": 2.3294, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.8502906976744186, |
|
"grad_norm": 1.1015498862150495, |
|
"learning_rate": 5.893470711620036e-07, |
|
"loss": 2.2882, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8510174418604651, |
|
"grad_norm": 1.0003788574406323, |
|
"learning_rate": 5.837798717426846e-07, |
|
"loss": 2.2764, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.8517441860465116, |
|
"grad_norm": 1.024968554027419, |
|
"learning_rate": 5.782374615630682e-07, |
|
"loss": 2.228, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.8524709302325582, |
|
"grad_norm": 1.0094305384280167, |
|
"learning_rate": 5.727198717339511e-07, |
|
"loss": 2.2914, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.8531976744186046, |
|
"grad_norm": 1.0110519070659092, |
|
"learning_rate": 5.672271332268098e-07, |
|
"loss": 2.3431, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.8539244186046512, |
|
"grad_norm": 1.0385439058435382, |
|
"learning_rate": 5.617592768736269e-07, |
|
"loss": 2.3103, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.8546511627906976, |
|
"grad_norm": 1.0511462342322628, |
|
"learning_rate": 5.563163333667098e-07, |
|
"loss": 2.2852, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.8553779069767442, |
|
"grad_norm": 1.0132897307989803, |
|
"learning_rate": 5.508983332585316e-07, |
|
"loss": 2.2611, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.8561046511627907, |
|
"grad_norm": 0.9985229317455512, |
|
"learning_rate": 5.455053069615456e-07, |
|
"loss": 2.3396, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.8568313953488372, |
|
"grad_norm": 1.2719782958329457, |
|
"learning_rate": 5.401372847480285e-07, |
|
"loss": 2.2947, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.8575581395348837, |
|
"grad_norm": 1.1276835003016656, |
|
"learning_rate": 5.347942967498965e-07, |
|
"loss": 2.3052, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8582848837209303, |
|
"grad_norm": 4.384284810043298, |
|
"learning_rate": 5.294763729585484e-07, |
|
"loss": 2.2757, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.8590116279069767, |
|
"grad_norm": 1.2941565270254776, |
|
"learning_rate": 5.241835432246888e-07, |
|
"loss": 2.2744, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.8597383720930233, |
|
"grad_norm": 0.9811407369818785, |
|
"learning_rate": 5.18915837258166e-07, |
|
"loss": 2.2824, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.8604651162790697, |
|
"grad_norm": 0.978937747602062, |
|
"learning_rate": 5.136732846278003e-07, |
|
"loss": 2.2379, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.8611918604651163, |
|
"grad_norm": 1.1398240380059452, |
|
"learning_rate": 5.084559147612244e-07, |
|
"loss": 2.3125, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.8619186046511628, |
|
"grad_norm": 0.9982485781718631, |
|
"learning_rate": 5.032637569447091e-07, |
|
"loss": 2.3173, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.8626453488372093, |
|
"grad_norm": 1.1061089964328414, |
|
"learning_rate": 4.980968403230097e-07, |
|
"loss": 2.301, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.8633720930232558, |
|
"grad_norm": 1.0805494456177815, |
|
"learning_rate": 4.929551938991945e-07, |
|
"loss": 2.2888, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.8640988372093024, |
|
"grad_norm": 1.1308532612444147, |
|
"learning_rate": 4.87838846534483e-07, |
|
"loss": 2.2963, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.8648255813953488, |
|
"grad_norm": 1.1810215565297821, |
|
"learning_rate": 4.827478269480895e-07, |
|
"loss": 2.2896, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8655523255813954, |
|
"grad_norm": 1.0037351932727745, |
|
"learning_rate": 4.776821637170525e-07, |
|
"loss": 2.3178, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.8662790697674418, |
|
"grad_norm": 1.1742864207537889, |
|
"learning_rate": 4.726418852760839e-07, |
|
"loss": 2.3462, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.8670058139534884, |
|
"grad_norm": 1.0390414759278597, |
|
"learning_rate": 4.6762701991740434e-07, |
|
"loss": 2.3178, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.8677325581395349, |
|
"grad_norm": 1.0152117439648225, |
|
"learning_rate": 4.626375957905821e-07, |
|
"loss": 2.3091, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.8684593023255814, |
|
"grad_norm": 1.0125484137726066, |
|
"learning_rate": 4.576736409023813e-07, |
|
"loss": 2.295, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.8691860465116279, |
|
"grad_norm": 1.0648811208027753, |
|
"learning_rate": 4.5273518311660103e-07, |
|
"loss": 2.311, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.8699127906976745, |
|
"grad_norm": 0.9969625476223827, |
|
"learning_rate": 4.4782225015391754e-07, |
|
"loss": 2.3062, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.8706395348837209, |
|
"grad_norm": 1.191266036217783, |
|
"learning_rate": 4.429348695917329e-07, |
|
"loss": 2.2864, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.8713662790697675, |
|
"grad_norm": 1.054591606203741, |
|
"learning_rate": 4.3807306886401555e-07, |
|
"loss": 2.2381, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.872093023255814, |
|
"grad_norm": 1.0919505023664373, |
|
"learning_rate": 4.3323687526115045e-07, |
|
"loss": 2.258, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8728197674418605, |
|
"grad_norm": 1.087148546984066, |
|
"learning_rate": 4.284263159297819e-07, |
|
"loss": 2.3222, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.873546511627907, |
|
"grad_norm": 6.580439059174034, |
|
"learning_rate": 4.2364141787266613e-07, |
|
"loss": 2.2518, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.8742732558139535, |
|
"grad_norm": 1.0455521942127206, |
|
"learning_rate": 4.1888220794851386e-07, |
|
"loss": 2.3222, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 1.0342987091911544, |
|
"learning_rate": 4.141487128718452e-07, |
|
"loss": 2.2385, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.8757267441860465, |
|
"grad_norm": 1.1782647809260987, |
|
"learning_rate": 4.0944095921283347e-07, |
|
"loss": 2.3029, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.876453488372093, |
|
"grad_norm": 1.0105311825679908, |
|
"learning_rate": 4.0475897339716466e-07, |
|
"loss": 2.2689, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.8771802325581395, |
|
"grad_norm": 1.1756077441855846, |
|
"learning_rate": 4.001027817058789e-07, |
|
"loss": 2.2578, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.877906976744186, |
|
"grad_norm": 1.1994916473407944, |
|
"learning_rate": 3.9547241027523164e-07, |
|
"loss": 2.359, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.8786337209302325, |
|
"grad_norm": 1.0065299853456233, |
|
"learning_rate": 3.908678850965425e-07, |
|
"loss": 2.2853, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.8793604651162791, |
|
"grad_norm": 0.995293307148518, |
|
"learning_rate": 3.862892320160483e-07, |
|
"loss": 2.2858, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8800872093023255, |
|
"grad_norm": 1.0258935554168493, |
|
"learning_rate": 3.8173647673476366e-07, |
|
"loss": 2.3548, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.8808139534883721, |
|
"grad_norm": 0.9956512980655323, |
|
"learning_rate": 3.7720964480832847e-07, |
|
"loss": 2.2886, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.8815406976744186, |
|
"grad_norm": 1.0464019346352542, |
|
"learning_rate": 3.727087616468739e-07, |
|
"loss": 2.2975, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.8822674418604651, |
|
"grad_norm": 1.0261367720153103, |
|
"learning_rate": 3.682338525148699e-07, |
|
"loss": 2.2455, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.8829941860465116, |
|
"grad_norm": 0.9784631495637669, |
|
"learning_rate": 3.6378494253099307e-07, |
|
"loss": 2.2994, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.8837209302325582, |
|
"grad_norm": 1.0561243830461962, |
|
"learning_rate": 3.5936205666797675e-07, |
|
"loss": 2.3172, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.8844476744186046, |
|
"grad_norm": 0.9866167353173949, |
|
"learning_rate": 3.549652197524783e-07, |
|
"loss": 2.2568, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.8851744186046512, |
|
"grad_norm": 1.7694336587594428, |
|
"learning_rate": 3.505944564649344e-07, |
|
"loss": 2.3109, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.8859011627906976, |
|
"grad_norm": 1.0237927443574126, |
|
"learning_rate": 3.462497913394258e-07, |
|
"loss": 2.2819, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.8866279069767442, |
|
"grad_norm": 1.0797737719641145, |
|
"learning_rate": 3.419312487635362e-07, |
|
"loss": 2.2423, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8873546511627907, |
|
"grad_norm": 1.0965412032340838, |
|
"learning_rate": 3.3763885297822153e-07, |
|
"loss": 2.294, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.8880813953488372, |
|
"grad_norm": 1.1707762135194628, |
|
"learning_rate": 3.333726280776656e-07, |
|
"loss": 2.3118, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.8888081395348837, |
|
"grad_norm": 1.312947277182204, |
|
"learning_rate": 3.2913259800915196e-07, |
|
"loss": 2.259, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.8895348837209303, |
|
"grad_norm": 1.0501531220783618, |
|
"learning_rate": 3.2491878657292643e-07, |
|
"loss": 2.2494, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.8902616279069767, |
|
"grad_norm": 0.9978642932087061, |
|
"learning_rate": 3.2073121742206117e-07, |
|
"loss": 2.2676, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8909883720930233, |
|
"grad_norm": 1.0219935225349466, |
|
"learning_rate": 3.165699140623285e-07, |
|
"loss": 2.2609, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.8917151162790697, |
|
"grad_norm": 1.0271766170690235, |
|
"learning_rate": 3.1243489985206097e-07, |
|
"loss": 2.2444, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.8924418604651163, |
|
"grad_norm": 1.2162058838576948, |
|
"learning_rate": 3.0832619800202746e-07, |
|
"loss": 2.3607, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.8931686046511628, |
|
"grad_norm": 0.9647661066514175, |
|
"learning_rate": 3.0424383157529716e-07, |
|
"loss": 2.2938, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.8938953488372093, |
|
"grad_norm": 1.0359666318241527, |
|
"learning_rate": 3.001878234871147e-07, |
|
"loss": 2.2933, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8946220930232558, |
|
"grad_norm": 1.0260708734909725, |
|
"learning_rate": 2.961581965047672e-07, |
|
"loss": 2.2944, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.8953488372093024, |
|
"grad_norm": 1.9596558246449935, |
|
"learning_rate": 2.921549732474599e-07, |
|
"loss": 2.3171, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.8960755813953488, |
|
"grad_norm": 1.1149831837749224, |
|
"learning_rate": 2.8817817618618846e-07, |
|
"loss": 2.3445, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.8968023255813954, |
|
"grad_norm": 1.0325460985272938, |
|
"learning_rate": 2.842278276436128e-07, |
|
"loss": 2.2966, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.8975290697674418, |
|
"grad_norm": 1.0390227024813596, |
|
"learning_rate": 2.803039497939281e-07, |
|
"loss": 2.26, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.8982558139534884, |
|
"grad_norm": 1.091918329127377, |
|
"learning_rate": 2.7640656466274785e-07, |
|
"loss": 2.3169, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.8989825581395349, |
|
"grad_norm": 1.0489517864380382, |
|
"learning_rate": 2.7253569412697244e-07, |
|
"loss": 2.2924, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.8997093023255814, |
|
"grad_norm": 1.1270113218465792, |
|
"learning_rate": 2.686913599146723e-07, |
|
"loss": 2.305, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.9004360465116279, |
|
"grad_norm": 1.0077746037473514, |
|
"learning_rate": 2.648735836049615e-07, |
|
"loss": 2.2416, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9011627906976745, |
|
"grad_norm": 1.0534517963417607, |
|
"learning_rate": 2.6108238662788057e-07, |
|
"loss": 2.2928, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9018895348837209, |
|
"grad_norm": 1.2397150258831577, |
|
"learning_rate": 2.573177902642726e-07, |
|
"loss": 2.3184, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9026162790697675, |
|
"grad_norm": 1.1152532284797432, |
|
"learning_rate": 2.5357981564566647e-07, |
|
"loss": 2.3111, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.903343023255814, |
|
"grad_norm": 1.015507101048782, |
|
"learning_rate": 2.4986848375415653e-07, |
|
"loss": 2.2436, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.9040697674418605, |
|
"grad_norm": 1.0734114420916194, |
|
"learning_rate": 2.4618381542228565e-07, |
|
"loss": 2.3737, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.904796511627907, |
|
"grad_norm": 1.346656207028202, |
|
"learning_rate": 2.4252583133292927e-07, |
|
"loss": 2.2639, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9055232558139535, |
|
"grad_norm": 1.0354149154983616, |
|
"learning_rate": 2.3889455201917655e-07, |
|
"loss": 2.3014, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 1.0294255073427006, |
|
"learning_rate": 2.3528999786421758e-07, |
|
"loss": 2.329, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9069767441860465, |
|
"grad_norm": 1.0697056028935716, |
|
"learning_rate": 2.3171218910122695e-07, |
|
"loss": 2.3277, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.907703488372093, |
|
"grad_norm": 1.0427212290454575, |
|
"learning_rate": 2.2816114581325377e-07, |
|
"loss": 2.3078, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9084302325581395, |
|
"grad_norm": 1.0376797552205965, |
|
"learning_rate": 2.2463688793310345e-07, |
|
"loss": 2.3154, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.909156976744186, |
|
"grad_norm": 1.0936357024502716, |
|
"learning_rate": 2.2113943524323167e-07, |
|
"loss": 2.3418, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 0.9098837209302325, |
|
"grad_norm": 1.0037611206384802, |
|
"learning_rate": 2.1766880737562833e-07, |
|
"loss": 2.2743, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.9106104651162791, |
|
"grad_norm": 1.086635907292096, |
|
"learning_rate": 2.1422502381171163e-07, |
|
"loss": 2.3162, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 0.9113372093023255, |
|
"grad_norm": 1.1145643232811908, |
|
"learning_rate": 2.1080810388221406e-07, |
|
"loss": 2.2704, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.9120639534883721, |
|
"grad_norm": 1.0598433068070068, |
|
"learning_rate": 2.0741806676707887e-07, |
|
"loss": 2.3319, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.9127906976744186, |
|
"grad_norm": 1.3127144244086706, |
|
"learning_rate": 2.0405493149534828e-07, |
|
"loss": 2.3116, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.9135174418604651, |
|
"grad_norm": 1.269702023739857, |
|
"learning_rate": 2.007187169450603e-07, |
|
"loss": 2.2136, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 0.9142441860465116, |
|
"grad_norm": 0.9803876917909695, |
|
"learning_rate": 1.9740944184313882e-07, |
|
"loss": 2.2767, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.9149709302325582, |
|
"grad_norm": 0.9509347439890067, |
|
"learning_rate": 1.941271247652915e-07, |
|
"loss": 2.2829, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 0.9156976744186046, |
|
"grad_norm": 0.9886083066522969, |
|
"learning_rate": 1.908717841359048e-07, |
|
"loss": 2.2567, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9164244186046512, |
|
"grad_norm": 1.128556701776484, |
|
"learning_rate": 1.8764343822793962e-07, |
|
"loss": 2.3189, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 0.9171511627906976, |
|
"grad_norm": 1.0214841774757928, |
|
"learning_rate": 1.8444210516283035e-07, |
|
"loss": 2.2686, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.9178779069767442, |
|
"grad_norm": 1.0384165232113651, |
|
"learning_rate": 1.8126780291038037e-07, |
|
"loss": 2.2953, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 0.9186046511627907, |
|
"grad_norm": 1.0239892896324345, |
|
"learning_rate": 1.7812054928866617e-07, |
|
"loss": 2.2896, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.9193313953488372, |
|
"grad_norm": 1.0049155639014276, |
|
"learning_rate": 1.7500036196392956e-07, |
|
"loss": 2.2505, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.9200581395348837, |
|
"grad_norm": 1.0166829246084337, |
|
"learning_rate": 1.7190725845048827e-07, |
|
"loss": 2.2876, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.9207848837209303, |
|
"grad_norm": 1.0121063072615553, |
|
"learning_rate": 1.688412561106284e-07, |
|
"loss": 2.2638, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 0.9215116279069767, |
|
"grad_norm": 1.0325016280932235, |
|
"learning_rate": 1.6580237215451378e-07, |
|
"loss": 2.2858, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.9222383720930233, |
|
"grad_norm": 1.1015868462586509, |
|
"learning_rate": 1.6279062364008446e-07, |
|
"loss": 2.3058, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 0.9229651162790697, |
|
"grad_norm": 1.0513722612177225, |
|
"learning_rate": 1.5980602747296513e-07, |
|
"loss": 2.2703, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9236918604651163, |
|
"grad_norm": 1.0097455230074164, |
|
"learning_rate": 1.5684860040636573e-07, |
|
"loss": 2.3194, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 0.9244186046511628, |
|
"grad_norm": 1.0351656666175164, |
|
"learning_rate": 1.5391835904099316e-07, |
|
"loss": 2.3516, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.9251453488372093, |
|
"grad_norm": 1.006940226866894, |
|
"learning_rate": 1.510153198249531e-07, |
|
"loss": 2.2536, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 0.9258720930232558, |
|
"grad_norm": 1.0120832517149825, |
|
"learning_rate": 1.4813949905365833e-07, |
|
"loss": 2.2966, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.9265988372093024, |
|
"grad_norm": 1.04037739337114, |
|
"learning_rate": 1.4529091286973994e-07, |
|
"loss": 2.3211, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.9273255813953488, |
|
"grad_norm": 1.0923715057949517, |
|
"learning_rate": 1.424695772629553e-07, |
|
"loss": 2.286, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.9280523255813954, |
|
"grad_norm": 1.0745241746743175, |
|
"learning_rate": 1.3967550807009677e-07, |
|
"loss": 2.3102, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 0.9287790697674418, |
|
"grad_norm": 1.2680690659745402, |
|
"learning_rate": 1.3690872097490481e-07, |
|
"loss": 2.3097, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.9295058139534884, |
|
"grad_norm": 0.98738554433098, |
|
"learning_rate": 1.3416923150798123e-07, |
|
"loss": 2.3177, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 1.7610982138078959, |
|
"learning_rate": 1.3145705504669592e-07, |
|
"loss": 2.2221, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9309593023255814, |
|
"grad_norm": 1.0476925479595647, |
|
"learning_rate": 1.2877220681510927e-07, |
|
"loss": 2.273, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 0.9316860465116279, |
|
"grad_norm": 1.0107799113760905, |
|
"learning_rate": 1.261147018838782e-07, |
|
"loss": 2.2488, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.9324127906976745, |
|
"grad_norm": 1.1192751459025567, |
|
"learning_rate": 1.2348455517017855e-07, |
|
"loss": 2.2751, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 0.9331395348837209, |
|
"grad_norm": 0.9712048469205136, |
|
"learning_rate": 1.208817814376162e-07, |
|
"loss": 2.2985, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.9338662790697675, |
|
"grad_norm": 0.9681812041910587, |
|
"learning_rate": 1.1830639529614774e-07, |
|
"loss": 2.2898, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.934593023255814, |
|
"grad_norm": 1.031175408747568, |
|
"learning_rate": 1.157584112019966e-07, |
|
"loss": 2.2973, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.9353197674418605, |
|
"grad_norm": 1.080255118494038, |
|
"learning_rate": 1.1323784345757205e-07, |
|
"loss": 2.2881, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 0.936046511627907, |
|
"grad_norm": 1.1584490527865852, |
|
"learning_rate": 1.1074470621138866e-07, |
|
"loss": 2.2674, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.9367732558139535, |
|
"grad_norm": 0.9893069974580888, |
|
"learning_rate": 1.0827901345798919e-07, |
|
"loss": 2.3274, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 1.1609278902238676, |
|
"learning_rate": 1.0584077903786238e-07, |
|
"loss": 2.2954, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9382267441860465, |
|
"grad_norm": 1.1214652879688172, |
|
"learning_rate": 1.0343001663736918e-07, |
|
"loss": 2.2437, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 0.938953488372093, |
|
"grad_norm": 1.0314479692365157, |
|
"learning_rate": 1.0104673978866164e-07, |
|
"loss": 2.3459, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.9396802325581395, |
|
"grad_norm": 1.0202246090098164, |
|
"learning_rate": 9.869096186961025e-08, |
|
"loss": 2.3238, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 0.940406976744186, |
|
"grad_norm": 1.735524228594828, |
|
"learning_rate": 9.636269610372895e-08, |
|
"loss": 2.2979, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.9411337209302325, |
|
"grad_norm": 1.0231462789990604, |
|
"learning_rate": 9.406195556009745e-08, |
|
"loss": 2.3135, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.9418604651162791, |
|
"grad_norm": 1.0387601752172189, |
|
"learning_rate": 9.178875315329183e-08, |
|
"loss": 2.2836, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.9425872093023255, |
|
"grad_norm": 1.0455127776642161, |
|
"learning_rate": 8.954310164331015e-08, |
|
"loss": 2.3477, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 0.9433139534883721, |
|
"grad_norm": 1.5004649471635758, |
|
"learning_rate": 8.732501363550028e-08, |
|
"loss": 2.31, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.9440406976744186, |
|
"grad_norm": 1.0296843011644148, |
|
"learning_rate": 8.513450158049109e-08, |
|
"loss": 2.3116, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 0.9447674418604651, |
|
"grad_norm": 0.9799375953016403, |
|
"learning_rate": 8.29715777741208e-08, |
|
"loss": 2.2626, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9454941860465116, |
|
"grad_norm": 0.9922942907902778, |
|
"learning_rate": 8.08362543573682e-08, |
|
"loss": 2.2388, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 0.9462209302325582, |
|
"grad_norm": 2.6395611468739446, |
|
"learning_rate": 7.872854331628599e-08, |
|
"loss": 2.3387, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.9469476744186046, |
|
"grad_norm": 1.3509341255307576, |
|
"learning_rate": 7.664845648193087e-08, |
|
"loss": 2.2566, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 0.9476744186046512, |
|
"grad_norm": 1.081975825106032, |
|
"learning_rate": 7.459600553029966e-08, |
|
"loss": 2.2852, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.9484011627906976, |
|
"grad_norm": 1.0477686545335492, |
|
"learning_rate": 7.257120198226219e-08, |
|
"loss": 2.2782, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.9491279069767442, |
|
"grad_norm": 1.0147183696685629, |
|
"learning_rate": 7.057405720349853e-08, |
|
"loss": 2.2711, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.9498546511627907, |
|
"grad_norm": 0.9804356237239087, |
|
"learning_rate": 6.860458240443179e-08, |
|
"loss": 2.3472, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 0.9505813953488372, |
|
"grad_norm": 1.0073244406187998, |
|
"learning_rate": 6.666278864016884e-08, |
|
"loss": 2.2972, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.9513081395348837, |
|
"grad_norm": 1.0325826072792375, |
|
"learning_rate": 6.474868681043578e-08, |
|
"loss": 2.2552, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 0.9520348837209303, |
|
"grad_norm": 1.0796140386264204, |
|
"learning_rate": 6.286228765951807e-08, |
|
"loss": 2.2278, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9527616279069767, |
|
"grad_norm": 0.9903943158949445, |
|
"learning_rate": 6.100360177619946e-08, |
|
"loss": 2.3095, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 0.9534883720930233, |
|
"grad_norm": 1.0300867433621375, |
|
"learning_rate": 5.917263959370312e-08, |
|
"loss": 2.3375, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.9542151162790697, |
|
"grad_norm": 1.076918105492359, |
|
"learning_rate": 5.736941138963281e-08, |
|
"loss": 2.3038, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 0.9549418604651163, |
|
"grad_norm": 1.0114088652079647, |
|
"learning_rate": 5.559392728591517e-08, |
|
"loss": 2.3148, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.9556686046511628, |
|
"grad_norm": 1.1906333105918063, |
|
"learning_rate": 5.384619724874307e-08, |
|
"loss": 2.2539, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.9563953488372093, |
|
"grad_norm": 1.134623849472849, |
|
"learning_rate": 5.2126231088519e-08, |
|
"loss": 2.2944, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.9571220930232558, |
|
"grad_norm": 0.9992883943690529, |
|
"learning_rate": 5.0434038459801213e-08, |
|
"loss": 2.2755, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 0.9578488372093024, |
|
"grad_norm": 1.1453760819725112, |
|
"learning_rate": 4.876962886124936e-08, |
|
"loss": 2.3314, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.9585755813953488, |
|
"grad_norm": 1.0711339360870722, |
|
"learning_rate": 4.713301163556894e-08, |
|
"loss": 2.3663, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 0.9593023255813954, |
|
"grad_norm": 1.193135985144878, |
|
"learning_rate": 4.5524195969461895e-08, |
|
"loss": 2.3048, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9600290697674418, |
|
"grad_norm": 1.1033385441868588, |
|
"learning_rate": 4.394319089357335e-08, |
|
"loss": 2.3037, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 0.9607558139534884, |
|
"grad_norm": 1.0425755305320366, |
|
"learning_rate": 4.239000528244164e-08, |
|
"loss": 2.2908, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.9614825581395349, |
|
"grad_norm": 1.0043149458946938, |
|
"learning_rate": 4.086464785444777e-08, |
|
"loss": 2.3172, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 0.9622093023255814, |
|
"grad_norm": 0.9510526219043042, |
|
"learning_rate": 3.936712717176716e-08, |
|
"loss": 2.3454, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.9629360465116279, |
|
"grad_norm": 1.0242871931784736, |
|
"learning_rate": 3.7897451640321326e-08, |
|
"loss": 2.255, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.9636627906976745, |
|
"grad_norm": 1.032293949458567, |
|
"learning_rate": 3.645562950973014e-08, |
|
"loss": 2.2723, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.9643895348837209, |
|
"grad_norm": 1.0085818746357065, |
|
"learning_rate": 3.504166887326688e-08, |
|
"loss": 2.28, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 0.9651162790697675, |
|
"grad_norm": 1.0094863113312103, |
|
"learning_rate": 3.365557766781047e-08, |
|
"loss": 2.3334, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.965843023255814, |
|
"grad_norm": 1.0044401474819298, |
|
"learning_rate": 3.229736367380498e-08, |
|
"loss": 2.3234, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 0.9665697674418605, |
|
"grad_norm": 1.1918541717117412, |
|
"learning_rate": 3.0967034515211323e-08, |
|
"loss": 2.3161, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.967296511627907, |
|
"grad_norm": 0.9889890714491774, |
|
"learning_rate": 2.966459765946672e-08, |
|
"loss": 2.3153, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 0.9680232558139535, |
|
"grad_norm": 1.064404134607704, |
|
"learning_rate": 2.8390060417443632e-08, |
|
"loss": 2.2949, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 0.9820533577158319, |
|
"learning_rate": 2.714342994340646e-08, |
|
"loss": 2.3176, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.9694767441860465, |
|
"grad_norm": 0.9869071666343803, |
|
"learning_rate": 2.592471323497381e-08, |
|
"loss": 2.2175, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.970203488372093, |
|
"grad_norm": 1.024509197686126, |
|
"learning_rate": 2.4733917133077378e-08, |
|
"loss": 2.3161, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.9709302325581395, |
|
"grad_norm": 1.0089119887260893, |
|
"learning_rate": 2.3571048321923694e-08, |
|
"loss": 2.2915, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.971656976744186, |
|
"grad_norm": 0.9995707439981476, |
|
"learning_rate": 2.2436113328958565e-08, |
|
"loss": 2.3017, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 0.9723837209302325, |
|
"grad_norm": 1.2909850946179122, |
|
"learning_rate": 2.1329118524827662e-08, |
|
"loss": 2.2576, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.9731104651162791, |
|
"grad_norm": 0.9839313923654567, |
|
"learning_rate": 2.0250070123342124e-08, |
|
"loss": 2.3013, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 0.9738372093023255, |
|
"grad_norm": 1.0289650966110175, |
|
"learning_rate": 1.9198974181444675e-08, |
|
"loss": 2.3018, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9745639534883721, |
|
"grad_norm": 1.2914117926481417, |
|
"learning_rate": 1.8175836599173545e-08, |
|
"loss": 2.3079, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 0.9752906976744186, |
|
"grad_norm": 1.0055706662475437, |
|
"learning_rate": 1.7180663119630846e-08, |
|
"loss": 2.2506, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.9760174418604651, |
|
"grad_norm": 0.9952235589773648, |
|
"learning_rate": 1.6213459328950355e-08, |
|
"loss": 2.2793, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 0.9767441860465116, |
|
"grad_norm": 1.1363778138841705, |
|
"learning_rate": 1.5274230656263656e-08, |
|
"loss": 2.3271, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.9774709302325582, |
|
"grad_norm": 1.0689170237371675, |
|
"learning_rate": 1.4362982373675171e-08, |
|
"loss": 2.3187, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.9781976744186046, |
|
"grad_norm": 1.0815995913125536, |
|
"learning_rate": 1.347971959622496e-08, |
|
"loss": 2.3334, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.9789244186046512, |
|
"grad_norm": 1.035545163434121, |
|
"learning_rate": 1.2624447281867625e-08, |
|
"loss": 2.3098, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 0.9796511627906976, |
|
"grad_norm": 1.0948187248544319, |
|
"learning_rate": 1.1797170231439004e-08, |
|
"loss": 2.3251, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.9803779069767442, |
|
"grad_norm": 1.046292294927679, |
|
"learning_rate": 1.0997893088632306e-08, |
|
"loss": 2.3078, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 0.9811046511627907, |
|
"grad_norm": 1.0300319397600337, |
|
"learning_rate": 1.0226620339969795e-08, |
|
"loss": 2.3033, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9818313953488372, |
|
"grad_norm": 0.9767087046357332, |
|
"learning_rate": 9.48335631477948e-09, |
|
"loss": 2.3278, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 0.9825581395348837, |
|
"grad_norm": 1.198857506546238, |
|
"learning_rate": 8.768105185170683e-09, |
|
"loss": 2.2652, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.9832848837209303, |
|
"grad_norm": 0.9863221166831615, |
|
"learning_rate": 8.080870966008513e-09, |
|
"loss": 2.3035, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 0.9840116279069767, |
|
"grad_norm": 0.9996635167228194, |
|
"learning_rate": 7.421657514893321e-09, |
|
"loss": 2.3307, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.9847383720930233, |
|
"grad_norm": 0.9743277323285772, |
|
"learning_rate": 6.79046853214016e-09, |
|
"loss": 2.2822, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.9854651162790697, |
|
"grad_norm": 1.025580657364137, |
|
"learning_rate": 6.187307560754363e-09, |
|
"loss": 2.3112, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.9861918604651163, |
|
"grad_norm": 1.0871552124899244, |
|
"learning_rate": 5.612177986414891e-09, |
|
"loss": 2.2338, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 0.9869186046511628, |
|
"grad_norm": 1.2139378980093414, |
|
"learning_rate": 5.065083037453234e-09, |
|
"loss": 2.2583, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.9876453488372093, |
|
"grad_norm": 1.1655655831929417, |
|
"learning_rate": 4.546025784837316e-09, |
|
"loss": 2.3364, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 0.9883720930232558, |
|
"grad_norm": 1.0020916790797725, |
|
"learning_rate": 4.055009142152066e-09, |
|
"loss": 2.258, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9890988372093024, |
|
"grad_norm": 1.0161785758267667, |
|
"learning_rate": 3.5920358655844312e-09, |
|
"loss": 2.2484, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 0.9898255813953488, |
|
"grad_norm": 0.9954194415297898, |
|
"learning_rate": 3.1571085539089384e-09, |
|
"loss": 2.2928, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.9905523255813954, |
|
"grad_norm": 0.9865483653721157, |
|
"learning_rate": 2.7502296484699374e-09, |
|
"loss": 2.2793, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 0.9912790697674418, |
|
"grad_norm": 1.0946412791341318, |
|
"learning_rate": 2.371401433170495e-09, |
|
"loss": 2.2972, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.9920058139534884, |
|
"grad_norm": 1.004980651503775, |
|
"learning_rate": 2.0206260344590724e-09, |
|
"loss": 2.3216, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.9927325581395349, |
|
"grad_norm": 1.0316396359422022, |
|
"learning_rate": 1.6979054213173141e-09, |
|
"loss": 2.2782, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.9934593023255814, |
|
"grad_norm": 1.3085920072304564, |
|
"learning_rate": 1.4032414052478348e-09, |
|
"loss": 2.2255, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 0.9941860465116279, |
|
"grad_norm": 1.045228547510724, |
|
"learning_rate": 1.136635640267003e-09, |
|
"loss": 2.3237, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.9949127906976745, |
|
"grad_norm": 1.3155145718130687, |
|
"learning_rate": 8.980896228932834e-10, |
|
"loss": 2.2275, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 0.9956395348837209, |
|
"grad_norm": 0.9921059811114996, |
|
"learning_rate": 6.876046921389102e-10, |
|
"loss": 2.2629, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9963662790697675, |
|
"grad_norm": 1.2788236393841776, |
|
"learning_rate": 5.051820295032262e-10, |
|
"loss": 2.2405, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 0.997093023255814, |
|
"grad_norm": 1.0501847646368694, |
|
"learning_rate": 3.508226589660213e-10, |
|
"loss": 2.3305, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.9978197674418605, |
|
"grad_norm": 1.1446929377734676, |
|
"learning_rate": 2.2452744698087114e-10, |
|
"loss": 2.3071, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.998546511627907, |
|
"grad_norm": 0.9948071557609979, |
|
"learning_rate": 1.2629710247180626e-10, |
|
"loss": 2.2705, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.9992732558139535, |
|
"grad_norm": 1.0555106775673517, |
|
"learning_rate": 5.613217682720606e-11, |
|
"loss": 2.2112, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.9820602357954474, |
|
"learning_rate": 1.4033063899243637e-11, |
|
"loss": 2.3398, |
|
"step": 1376 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1376, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 688, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0909863237176525e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|