|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 268, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0037313432835820895, |
|
"grad_norm": 11.718372519156443, |
|
"learning_rate": 2e-07, |
|
"loss": 1.5946, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007462686567164179, |
|
"grad_norm": 12.194885050328885, |
|
"learning_rate": 4e-07, |
|
"loss": 1.6052, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.011194029850746268, |
|
"grad_norm": 13.01588928537949, |
|
"learning_rate": 6e-07, |
|
"loss": 1.7294, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.014925373134328358, |
|
"grad_norm": 12.313961908592717, |
|
"learning_rate": 8e-07, |
|
"loss": 1.5217, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.018656716417910446, |
|
"grad_norm": 12.37356816651012, |
|
"learning_rate": 1e-06, |
|
"loss": 1.3742, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.022388059701492536, |
|
"grad_norm": 11.427627877521694, |
|
"learning_rate": 1.2e-06, |
|
"loss": 1.4112, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.026119402985074626, |
|
"grad_norm": 10.518944089588336, |
|
"learning_rate": 1.4e-06, |
|
"loss": 1.6147, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.029850746268656716, |
|
"grad_norm": 8.76984656973648, |
|
"learning_rate": 1.6e-06, |
|
"loss": 1.4108, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.033582089552238806, |
|
"grad_norm": 8.084328772350803, |
|
"learning_rate": 1.8e-06, |
|
"loss": 1.2484, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03731343283582089, |
|
"grad_norm": 9.697158992831765, |
|
"learning_rate": 2e-06, |
|
"loss": 1.23, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.041044776119402986, |
|
"grad_norm": 18.09050975452378, |
|
"learning_rate": 1.9999821640202585e-06, |
|
"loss": 1.2535, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04477611940298507, |
|
"grad_norm": 18.87482868101649, |
|
"learning_rate": 1.9999286567172775e-06, |
|
"loss": 1.7845, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.048507462686567165, |
|
"grad_norm": 16.46811558730055, |
|
"learning_rate": 1.999839479999768e-06, |
|
"loss": 1.5637, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.05223880597014925, |
|
"grad_norm": 17.76805068894177, |
|
"learning_rate": 1.999714637048838e-06, |
|
"loss": 1.3015, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.055970149253731345, |
|
"grad_norm": 16.79626847324504, |
|
"learning_rate": 1.9995541323178804e-06, |
|
"loss": 1.6793, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05970149253731343, |
|
"grad_norm": 11.89468375626604, |
|
"learning_rate": 1.9993579715324135e-06, |
|
"loss": 1.3764, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06343283582089553, |
|
"grad_norm": 10.93288879427306, |
|
"learning_rate": 1.9991261616898766e-06, |
|
"loss": 1.3707, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06716417910447761, |
|
"grad_norm": 8.361062086748422, |
|
"learning_rate": 1.9988587110593807e-06, |
|
"loss": 1.6238, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0708955223880597, |
|
"grad_norm": 6.736084108094181, |
|
"learning_rate": 1.9985556291814147e-06, |
|
"loss": 1.2496, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07462686567164178, |
|
"grad_norm": 7.272816093776056, |
|
"learning_rate": 1.9982169268675023e-06, |
|
"loss": 1.5627, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07835820895522388, |
|
"grad_norm": 6.329740952982906, |
|
"learning_rate": 1.997842616199819e-06, |
|
"loss": 1.3453, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08208955223880597, |
|
"grad_norm": 4.520010885801565, |
|
"learning_rate": 1.99743271053076e-06, |
|
"loss": 1.5461, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08582089552238806, |
|
"grad_norm": 3.6042605956174354, |
|
"learning_rate": 1.9969872244824635e-06, |
|
"loss": 1.5243, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08955223880597014, |
|
"grad_norm": 6.07998924732389, |
|
"learning_rate": 1.99650617394629e-06, |
|
"loss": 1.1925, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09328358208955224, |
|
"grad_norm": 5.379400172850125, |
|
"learning_rate": 1.9959895760822544e-06, |
|
"loss": 1.3644, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09701492537313433, |
|
"grad_norm": 5.724766090503273, |
|
"learning_rate": 1.995437449318415e-06, |
|
"loss": 1.282, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.10074626865671642, |
|
"grad_norm": 7.952440800639504, |
|
"learning_rate": 1.994849813350215e-06, |
|
"loss": 1.2719, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1044776119402985, |
|
"grad_norm": 5.5605547177462915, |
|
"learning_rate": 1.9942266891397812e-06, |
|
"loss": 1.4344, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.10820895522388059, |
|
"grad_norm": 4.20729688038585, |
|
"learning_rate": 1.9935680989151754e-06, |
|
"loss": 1.4261, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.11194029850746269, |
|
"grad_norm": 4.179661464504617, |
|
"learning_rate": 1.9928740661696007e-06, |
|
"loss": 1.7263, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11567164179104478, |
|
"grad_norm": 4.444811669926014, |
|
"learning_rate": 1.992144615660566e-06, |
|
"loss": 1.4733, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.11940298507462686, |
|
"grad_norm": 4.028798590389647, |
|
"learning_rate": 1.9913797734089995e-06, |
|
"loss": 1.2016, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12313432835820895, |
|
"grad_norm": 3.667451061021437, |
|
"learning_rate": 1.990579566698323e-06, |
|
"loss": 1.3823, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.12686567164179105, |
|
"grad_norm": 3.204493378117863, |
|
"learning_rate": 1.9897440240734786e-06, |
|
"loss": 1.1922, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.13059701492537312, |
|
"grad_norm": 2.5157465668345225, |
|
"learning_rate": 1.9888731753399087e-06, |
|
"loss": 1.3169, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13432835820895522, |
|
"grad_norm": 4.013532823842619, |
|
"learning_rate": 1.9879670515624933e-06, |
|
"loss": 1.5473, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.13805970149253732, |
|
"grad_norm": 4.443422336518452, |
|
"learning_rate": 1.9870256850644436e-06, |
|
"loss": 1.3413, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1417910447761194, |
|
"grad_norm": 3.8185223864854443, |
|
"learning_rate": 1.9860491094261476e-06, |
|
"loss": 1.3775, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1455223880597015, |
|
"grad_norm": 3.2374300805591356, |
|
"learning_rate": 1.9850373594839715e-06, |
|
"loss": 1.4237, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 3.4010570222238945, |
|
"learning_rate": 1.9839904713290183e-06, |
|
"loss": 1.3512, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15298507462686567, |
|
"grad_norm": 3.7069360522451036, |
|
"learning_rate": 1.9829084823058396e-06, |
|
"loss": 1.3539, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.15671641791044777, |
|
"grad_norm": 4.189014809102456, |
|
"learning_rate": 1.9817914310111044e-06, |
|
"loss": 1.184, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.16044776119402984, |
|
"grad_norm": 2.6617535657025764, |
|
"learning_rate": 1.980639357292221e-06, |
|
"loss": 0.9118, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.16417910447761194, |
|
"grad_norm": 5.853386102660827, |
|
"learning_rate": 1.9794523022459164e-06, |
|
"loss": 1.2803, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.16791044776119404, |
|
"grad_norm": 6.062445920092195, |
|
"learning_rate": 1.9782303082167703e-06, |
|
"loss": 1.1335, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.17164179104477612, |
|
"grad_norm": 3.931954985578852, |
|
"learning_rate": 1.976973418795704e-06, |
|
"loss": 1.3316, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.17537313432835822, |
|
"grad_norm": 4.463556977447332, |
|
"learning_rate": 1.9756816788184255e-06, |
|
"loss": 1.0166, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1791044776119403, |
|
"grad_norm": 4.622340592834071, |
|
"learning_rate": 1.974355134363832e-06, |
|
"loss": 1.3222, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1828358208955224, |
|
"grad_norm": 6.981137384760035, |
|
"learning_rate": 1.972993832752363e-06, |
|
"loss": 1.2864, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.1865671641791045, |
|
"grad_norm": 7.538142483116736, |
|
"learning_rate": 1.9715978225443146e-06, |
|
"loss": 1.3298, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19029850746268656, |
|
"grad_norm": 5.378377013811346, |
|
"learning_rate": 1.970167153538106e-06, |
|
"loss": 1.6898, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.19402985074626866, |
|
"grad_norm": 4.107675209526753, |
|
"learning_rate": 1.9687018767685044e-06, |
|
"loss": 1.2467, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.19776119402985073, |
|
"grad_norm": 3.462570336890335, |
|
"learning_rate": 1.9672020445048035e-06, |
|
"loss": 1.2439, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.20149253731343283, |
|
"grad_norm": 5.1132284991358565, |
|
"learning_rate": 1.9656677102489587e-06, |
|
"loss": 1.2553, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.20522388059701493, |
|
"grad_norm": 6.340348167811004, |
|
"learning_rate": 1.964098928733679e-06, |
|
"loss": 1.1768, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.208955223880597, |
|
"grad_norm": 3.9131654258908415, |
|
"learning_rate": 1.962495755920476e-06, |
|
"loss": 1.2787, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2126865671641791, |
|
"grad_norm": 4.822496966837534, |
|
"learning_rate": 1.9608582489976645e-06, |
|
"loss": 0.9751, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.21641791044776118, |
|
"grad_norm": 6.830738771850188, |
|
"learning_rate": 1.959186466378326e-06, |
|
"loss": 1.1173, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.22014925373134328, |
|
"grad_norm": 3.7173186685103716, |
|
"learning_rate": 1.9574804676982214e-06, |
|
"loss": 1.3968, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.22388059701492538, |
|
"grad_norm": 2.826816507380959, |
|
"learning_rate": 1.955740313813667e-06, |
|
"loss": 1.2454, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.22761194029850745, |
|
"grad_norm": 5.060694147656036, |
|
"learning_rate": 1.9539660667993617e-06, |
|
"loss": 1.2803, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.23134328358208955, |
|
"grad_norm": 4.866896848059289, |
|
"learning_rate": 1.952157789946173e-06, |
|
"loss": 1.3675, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.23507462686567165, |
|
"grad_norm": 3.6902361398869843, |
|
"learning_rate": 1.9503155477588792e-06, |
|
"loss": 1.3265, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.23880597014925373, |
|
"grad_norm": 3.844263841899348, |
|
"learning_rate": 1.9484394059538696e-06, |
|
"loss": 1.1316, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.24253731343283583, |
|
"grad_norm": 2.7586273990747094, |
|
"learning_rate": 1.9465294314567986e-06, |
|
"loss": 1.1227, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2462686567164179, |
|
"grad_norm": 4.486151113202917, |
|
"learning_rate": 1.9445856924001987e-06, |
|
"loss": 1.2286, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 3.4213181164419453, |
|
"learning_rate": 1.9426082581210507e-06, |
|
"loss": 1.1066, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2537313432835821, |
|
"grad_norm": 3.2763985271498646, |
|
"learning_rate": 1.9405971991583107e-06, |
|
"loss": 1.1251, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2574626865671642, |
|
"grad_norm": 3.2345913669334014, |
|
"learning_rate": 1.9385525872503914e-06, |
|
"loss": 1.1556, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.26119402985074625, |
|
"grad_norm": 3.106534312573296, |
|
"learning_rate": 1.9364744953326073e-06, |
|
"loss": 1.0577, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.26492537313432835, |
|
"grad_norm": 2.9408023039448272, |
|
"learning_rate": 1.9343629975345684e-06, |
|
"loss": 0.9973, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.26865671641791045, |
|
"grad_norm": 4.559495537184346, |
|
"learning_rate": 1.9322181691775386e-06, |
|
"loss": 1.2465, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.27238805970149255, |
|
"grad_norm": 4.24529782366461, |
|
"learning_rate": 1.9300400867717483e-06, |
|
"loss": 1.0913, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.27611940298507465, |
|
"grad_norm": 5.1030664391900595, |
|
"learning_rate": 1.9278288280136647e-06, |
|
"loss": 1.1773, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2798507462686567, |
|
"grad_norm": 2.266554727723586, |
|
"learning_rate": 1.9255844717832204e-06, |
|
"loss": 1.4612, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.2835820895522388, |
|
"grad_norm": 3.4335407395713795, |
|
"learning_rate": 1.9233070981410005e-06, |
|
"loss": 0.9848, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2873134328358209, |
|
"grad_norm": 3.0840869155577066, |
|
"learning_rate": 1.9209967883253844e-06, |
|
"loss": 1.1614, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.291044776119403, |
|
"grad_norm": 3.9174952308981648, |
|
"learning_rate": 1.9186536247496515e-06, |
|
"loss": 0.993, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2947761194029851, |
|
"grad_norm": 6.675359765928192, |
|
"learning_rate": 1.916277690999037e-06, |
|
"loss": 1.1993, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 3.3312539449544336, |
|
"learning_rate": 1.9138690718277538e-06, |
|
"loss": 1.1122, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.30223880597014924, |
|
"grad_norm": 3.848892934397973, |
|
"learning_rate": 1.9114278531559673e-06, |
|
"loss": 1.2558, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.30597014925373134, |
|
"grad_norm": 4.176454718168841, |
|
"learning_rate": 1.908954122066731e-06, |
|
"loss": 1.1981, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.30970149253731344, |
|
"grad_norm": 2.73071312325315, |
|
"learning_rate": 1.9064479668028799e-06, |
|
"loss": 1.2421, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.31343283582089554, |
|
"grad_norm": 3.195988999340381, |
|
"learning_rate": 1.903909476763883e-06, |
|
"loss": 1.1304, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.31716417910447764, |
|
"grad_norm": 4.218603688955521, |
|
"learning_rate": 1.9013387425026548e-06, |
|
"loss": 1.1864, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3208955223880597, |
|
"grad_norm": 3.6984773161373514, |
|
"learning_rate": 1.8987358557223229e-06, |
|
"loss": 1.1586, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.3246268656716418, |
|
"grad_norm": 3.92008728241735, |
|
"learning_rate": 1.8961009092729597e-06, |
|
"loss": 1.4377, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.3283582089552239, |
|
"grad_norm": 3.891396254447255, |
|
"learning_rate": 1.8934339971482673e-06, |
|
"loss": 0.8258, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.332089552238806, |
|
"grad_norm": 3.0403561652586593, |
|
"learning_rate": 1.8907352144822281e-06, |
|
"loss": 1.1502, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.3358208955223881, |
|
"grad_norm": 4.226515814729617, |
|
"learning_rate": 1.8880046575457071e-06, |
|
"loss": 1.3202, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.33955223880597013, |
|
"grad_norm": 3.1371524388050562, |
|
"learning_rate": 1.8852424237430213e-06, |
|
"loss": 1.0916, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.34328358208955223, |
|
"grad_norm": 5.798409840680744, |
|
"learning_rate": 1.882448611608463e-06, |
|
"loss": 1.0295, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.34701492537313433, |
|
"grad_norm": 3.20282503021626, |
|
"learning_rate": 1.8796233208027847e-06, |
|
"loss": 1.0562, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.35074626865671643, |
|
"grad_norm": 4.8622829880049165, |
|
"learning_rate": 1.8767666521096466e-06, |
|
"loss": 1.3517, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.35447761194029853, |
|
"grad_norm": 3.236061343689611, |
|
"learning_rate": 1.8738787074320176e-06, |
|
"loss": 1.3072, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.3582089552238806, |
|
"grad_norm": 2.8290205291903834, |
|
"learning_rate": 1.8709595897885436e-06, |
|
"loss": 1.0689, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3619402985074627, |
|
"grad_norm": 9.248683748830997, |
|
"learning_rate": 1.8680094033098714e-06, |
|
"loss": 1.1408, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.3656716417910448, |
|
"grad_norm": 3.351494222719439, |
|
"learning_rate": 1.865028253234933e-06, |
|
"loss": 1.0601, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.3694029850746269, |
|
"grad_norm": 2.9558835746922982, |
|
"learning_rate": 1.8620162459071933e-06, |
|
"loss": 1.469, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.373134328358209, |
|
"grad_norm": 4.188173168279777, |
|
"learning_rate": 1.8589734887708555e-06, |
|
"loss": 1.0811, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.376865671641791, |
|
"grad_norm": 4.149900932370019, |
|
"learning_rate": 1.855900090367029e-06, |
|
"loss": 1.2524, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3805970149253731, |
|
"grad_norm": 3.4354588132333954, |
|
"learning_rate": 1.852796160329857e-06, |
|
"loss": 1.2036, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3843283582089552, |
|
"grad_norm": 7.008352254151579, |
|
"learning_rate": 1.8496618093826062e-06, |
|
"loss": 1.3326, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3880597014925373, |
|
"grad_norm": 4.57793539598067, |
|
"learning_rate": 1.8464971493337165e-06, |
|
"loss": 1.1313, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.3917910447761194, |
|
"grad_norm": 5.023484484297041, |
|
"learning_rate": 1.843302293072813e-06, |
|
"loss": 1.1537, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.39552238805970147, |
|
"grad_norm": 5.417491975498189, |
|
"learning_rate": 1.8400773545666786e-06, |
|
"loss": 1.1948, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.39925373134328357, |
|
"grad_norm": 6.758289030774797, |
|
"learning_rate": 1.8368224488551895e-06, |
|
"loss": 1.4521, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.40298507462686567, |
|
"grad_norm": 3.113115656591011, |
|
"learning_rate": 1.8335376920472096e-06, |
|
"loss": 1.3848, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.40671641791044777, |
|
"grad_norm": 6.762254585306641, |
|
"learning_rate": 1.8302232013164516e-06, |
|
"loss": 1.157, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.41044776119402987, |
|
"grad_norm": 4.658736688116275, |
|
"learning_rate": 1.8268790948972938e-06, |
|
"loss": 1.0968, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4141791044776119, |
|
"grad_norm": 4.3724969896587, |
|
"learning_rate": 1.8235054920805651e-06, |
|
"loss": 1.3121, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.417910447761194, |
|
"grad_norm": 1.9047640764173237, |
|
"learning_rate": 1.8201025132092886e-06, |
|
"loss": 0.966, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.4216417910447761, |
|
"grad_norm": 2.6109096330803645, |
|
"learning_rate": 1.8166702796743888e-06, |
|
"loss": 0.9965, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.4253731343283582, |
|
"grad_norm": 2.236001574200741, |
|
"learning_rate": 1.813208913910361e-06, |
|
"loss": 1.2068, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4291044776119403, |
|
"grad_norm": 3.5252250250259904, |
|
"learning_rate": 1.8097185393909047e-06, |
|
"loss": 0.9945, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.43283582089552236, |
|
"grad_norm": 2.72651289344666, |
|
"learning_rate": 1.8061992806245183e-06, |
|
"loss": 1.1221, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.43656716417910446, |
|
"grad_norm": 2.7580533850806663, |
|
"learning_rate": 1.802651263150058e-06, |
|
"loss": 1.1106, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.44029850746268656, |
|
"grad_norm": 3.36730794119517, |
|
"learning_rate": 1.7990746135322592e-06, |
|
"loss": 1.3169, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.44402985074626866, |
|
"grad_norm": 3.602491306889906, |
|
"learning_rate": 1.7954694593572225e-06, |
|
"loss": 1.2271, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 2.995624556867228, |
|
"learning_rate": 1.7918359292278611e-06, |
|
"loss": 1.4585, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.45149253731343286, |
|
"grad_norm": 2.4713376740401394, |
|
"learning_rate": 1.7881741527593148e-06, |
|
"loss": 1.0635, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.4552238805970149, |
|
"grad_norm": 2.9859360943624558, |
|
"learning_rate": 1.7844842605743255e-06, |
|
"loss": 1.1158, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.458955223880597, |
|
"grad_norm": 2.236880197413377, |
|
"learning_rate": 1.7807663842985776e-06, |
|
"loss": 1.0568, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.4626865671641791, |
|
"grad_norm": 2.812929367683673, |
|
"learning_rate": 1.777020656556003e-06, |
|
"loss": 0.9711, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.4664179104477612, |
|
"grad_norm": 2.615520237147118, |
|
"learning_rate": 1.77324721096405e-06, |
|
"loss": 1.2155, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4701492537313433, |
|
"grad_norm": 2.56945954741141, |
|
"learning_rate": 1.7694461821289171e-06, |
|
"loss": 1.2214, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.47388059701492535, |
|
"grad_norm": 2.6524624585109255, |
|
"learning_rate": 1.7656177056407504e-06, |
|
"loss": 1.0783, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.47761194029850745, |
|
"grad_norm": 3.900106255085836, |
|
"learning_rate": 1.7617619180688084e-06, |
|
"loss": 1.1345, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.48134328358208955, |
|
"grad_norm": 3.6445674759996973, |
|
"learning_rate": 1.7578789569565889e-06, |
|
"loss": 1.1407, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.48507462686567165, |
|
"grad_norm": 3.2321962413724834, |
|
"learning_rate": 1.7539689608169236e-06, |
|
"loss": 1.2281, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.48880597014925375, |
|
"grad_norm": 4.609891513693221, |
|
"learning_rate": 1.7500320691270363e-06, |
|
"loss": 1.2394, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.4925373134328358, |
|
"grad_norm": 6.929277355854441, |
|
"learning_rate": 1.7460684223235678e-06, |
|
"loss": 1.233, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.4962686567164179, |
|
"grad_norm": 3.231892093569866, |
|
"learning_rate": 1.7420781617975663e-06, |
|
"loss": 0.9962, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.5090780090395115, |
|
"learning_rate": 1.738061429889444e-06, |
|
"loss": 0.9036, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.503731343283582, |
|
"grad_norm": 4.328856707412604, |
|
"learning_rate": 1.734018369883898e-06, |
|
"loss": 1.1895, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.5074626865671642, |
|
"grad_norm": 2.5691655391373875, |
|
"learning_rate": 1.7299491260048019e-06, |
|
"loss": 1.326, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5111940298507462, |
|
"grad_norm": 1.732025147621955, |
|
"learning_rate": 1.7258538434100576e-06, |
|
"loss": 1.2479, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5149253731343284, |
|
"grad_norm": 2.20121934912806, |
|
"learning_rate": 1.7217326681864206e-06, |
|
"loss": 1.0356, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5186567164179104, |
|
"grad_norm": 1.9985655432331606, |
|
"learning_rate": 1.717585747344286e-06, |
|
"loss": 1.1547, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5223880597014925, |
|
"grad_norm": 1.8929729658584291, |
|
"learning_rate": 1.7134132288124464e-06, |
|
"loss": 1.1972, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5261194029850746, |
|
"grad_norm": 2.5705526162651284, |
|
"learning_rate": 1.7092152614328136e-06, |
|
"loss": 0.9647, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.5298507462686567, |
|
"grad_norm": 2.3300073188215134, |
|
"learning_rate": 1.7049919949551099e-06, |
|
"loss": 1.4177, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.5335820895522388, |
|
"grad_norm": 2.919579845785974, |
|
"learning_rate": 1.7007435800315261e-06, |
|
"loss": 1.0245, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.5373134328358209, |
|
"grad_norm": 2.58628020939173, |
|
"learning_rate": 1.6964701682113474e-06, |
|
"loss": 1.1438, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.5410447761194029, |
|
"grad_norm": 2.1810582155175906, |
|
"learning_rate": 1.6921719119355466e-06, |
|
"loss": 1.1709, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5447761194029851, |
|
"grad_norm": 2.0256539029853036, |
|
"learning_rate": 1.687848964531348e-06, |
|
"loss": 1.2567, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.5485074626865671, |
|
"grad_norm": 2.6789651782329003, |
|
"learning_rate": 1.6835014802067556e-06, |
|
"loss": 1.2105, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.5522388059701493, |
|
"grad_norm": 2.2475712729751813, |
|
"learning_rate": 1.6791296140450543e-06, |
|
"loss": 1.0036, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.5559701492537313, |
|
"grad_norm": 3.081758388528468, |
|
"learning_rate": 1.6747335219992774e-06, |
|
"loss": 1.229, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.5597014925373134, |
|
"grad_norm": 3.4435580918281903, |
|
"learning_rate": 1.6703133608866414e-06, |
|
"loss": 1.2375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5634328358208955, |
|
"grad_norm": 3.6488645320162263, |
|
"learning_rate": 1.6658692883829546e-06, |
|
"loss": 1.2528, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.5671641791044776, |
|
"grad_norm": 2.6147378121358535, |
|
"learning_rate": 1.6614014630169915e-06, |
|
"loss": 1.0683, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.5708955223880597, |
|
"grad_norm": 3.4412924263138502, |
|
"learning_rate": 1.6569100441648372e-06, |
|
"loss": 1.2073, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.5746268656716418, |
|
"grad_norm": 3.8345977623754117, |
|
"learning_rate": 1.6523951920442032e-06, |
|
"loss": 1.1582, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.5783582089552238, |
|
"grad_norm": 3.049354065878489, |
|
"learning_rate": 1.6478570677087116e-06, |
|
"loss": 1.26, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.582089552238806, |
|
"grad_norm": 2.667157342873667, |
|
"learning_rate": 1.6432958330421497e-06, |
|
"loss": 1.1972, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.585820895522388, |
|
"grad_norm": 2.3988481838806517, |
|
"learning_rate": 1.6387116507526955e-06, |
|
"loss": 1.0296, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.5895522388059702, |
|
"grad_norm": 3.245331214881116, |
|
"learning_rate": 1.6341046843671142e-06, |
|
"loss": 1.0837, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.5932835820895522, |
|
"grad_norm": 2.740295402410237, |
|
"learning_rate": 1.629475098224924e-06, |
|
"loss": 1.0756, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 3.8088940588573625, |
|
"learning_rate": 1.6248230574725338e-06, |
|
"loss": 1.2506, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6007462686567164, |
|
"grad_norm": 5.166361637828825, |
|
"learning_rate": 1.6201487280573533e-06, |
|
"loss": 0.9793, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.6044776119402985, |
|
"grad_norm": 3.2415888531812485, |
|
"learning_rate": 1.6154522767218723e-06, |
|
"loss": 1.3401, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.6082089552238806, |
|
"grad_norm": 3.305335197143126, |
|
"learning_rate": 1.6107338709977118e-06, |
|
"loss": 1.4258, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.6119402985074627, |
|
"grad_norm": 3.074545865145304, |
|
"learning_rate": 1.6059936791996497e-06, |
|
"loss": 1.192, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.6156716417910447, |
|
"grad_norm": 2.40059366672424, |
|
"learning_rate": 1.601231870419616e-06, |
|
"loss": 0.984, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6194029850746269, |
|
"grad_norm": 2.9844452713407197, |
|
"learning_rate": 1.596448614520661e-06, |
|
"loss": 1.1051, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.6231343283582089, |
|
"grad_norm": 3.241493731745323, |
|
"learning_rate": 1.5916440821308947e-06, |
|
"loss": 1.1032, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.6268656716417911, |
|
"grad_norm": 4.002012008083462, |
|
"learning_rate": 1.586818444637402e-06, |
|
"loss": 1.1281, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.6305970149253731, |
|
"grad_norm": 2.891081597812952, |
|
"learning_rate": 1.5819718741801282e-06, |
|
"loss": 1.0984, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.6343283582089553, |
|
"grad_norm": 2.510097661559307, |
|
"learning_rate": 1.577104543645738e-06, |
|
"loss": 0.9818, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.6380597014925373, |
|
"grad_norm": 3.9519151526817784, |
|
"learning_rate": 1.5722166266614494e-06, |
|
"loss": 1.403, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.6417910447761194, |
|
"grad_norm": 2.889629899144798, |
|
"learning_rate": 1.5673082975888386e-06, |
|
"loss": 1.4251, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.6455223880597015, |
|
"grad_norm": 3.2843979337315203, |
|
"learning_rate": 1.5623797315176217e-06, |
|
"loss": 1.2102, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.6492537313432836, |
|
"grad_norm": 3.851544142794571, |
|
"learning_rate": 1.5574311042594077e-06, |
|
"loss": 1.3174, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.6529850746268657, |
|
"grad_norm": 3.0632504419966224, |
|
"learning_rate": 1.552462592341428e-06, |
|
"loss": 1.2578, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6567164179104478, |
|
"grad_norm": 2.9143363462552414, |
|
"learning_rate": 1.547474373000238e-06, |
|
"loss": 1.1117, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.6604477611940298, |
|
"grad_norm": 3.33708665616015, |
|
"learning_rate": 1.5424666241753963e-06, |
|
"loss": 1.3296, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.664179104477612, |
|
"grad_norm": 2.5174595420642767, |
|
"learning_rate": 1.5374395245031157e-06, |
|
"loss": 1.2501, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.667910447761194, |
|
"grad_norm": 4.722876645619478, |
|
"learning_rate": 1.5323932533098924e-06, |
|
"loss": 0.8606, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.6716417910447762, |
|
"grad_norm": 3.23675727446907, |
|
"learning_rate": 1.527327990606108e-06, |
|
"loss": 1.1848, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.6753731343283582, |
|
"grad_norm": 3.2255476770575906, |
|
"learning_rate": 1.522243917079608e-06, |
|
"loss": 1.1501, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.6791044776119403, |
|
"grad_norm": 2.660659180388112, |
|
"learning_rate": 1.5171412140892574e-06, |
|
"loss": 1.1792, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.6828358208955224, |
|
"grad_norm": 2.5742735359754656, |
|
"learning_rate": 1.512020063658471e-06, |
|
"loss": 1.0524, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.6865671641791045, |
|
"grad_norm": 2.7222921596819805, |
|
"learning_rate": 1.5068806484687188e-06, |
|
"loss": 0.9408, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.6902985074626866, |
|
"grad_norm": 2.854241224431344, |
|
"learning_rate": 1.5017231518530115e-06, |
|
"loss": 1.1946, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.6940298507462687, |
|
"grad_norm": 2.829758100829405, |
|
"learning_rate": 1.4965477577893596e-06, |
|
"loss": 1.0996, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.6977611940298507, |
|
"grad_norm": 2.7341811827310907, |
|
"learning_rate": 1.4913546508942104e-06, |
|
"loss": 1.3112, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.7014925373134329, |
|
"grad_norm": 2.727595416423421, |
|
"learning_rate": 1.486144016415862e-06, |
|
"loss": 0.8641, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.7052238805970149, |
|
"grad_norm": 2.6779321037785957, |
|
"learning_rate": 1.4809160402278572e-06, |
|
"loss": 1.0673, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.7089552238805971, |
|
"grad_norm": 2.105636468467112, |
|
"learning_rate": 1.4756709088223507e-06, |
|
"loss": 1.0804, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7126865671641791, |
|
"grad_norm": 2.308917007984876, |
|
"learning_rate": 1.470408809303457e-06, |
|
"loss": 1.0657, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.7164179104477612, |
|
"grad_norm": 2.272233759263439, |
|
"learning_rate": 1.4651299293805772e-06, |
|
"loss": 0.97, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.7201492537313433, |
|
"grad_norm": 2.38194076941112, |
|
"learning_rate": 1.459834457361702e-06, |
|
"loss": 1.1996, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.7238805970149254, |
|
"grad_norm": 2.609236963602244, |
|
"learning_rate": 1.4545225821466949e-06, |
|
"loss": 1.4137, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.7276119402985075, |
|
"grad_norm": 2.1583872582681303, |
|
"learning_rate": 1.449194493220553e-06, |
|
"loss": 1.21, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.7313432835820896, |
|
"grad_norm": 2.0168668065761004, |
|
"learning_rate": 1.443850380646649e-06, |
|
"loss": 1.2648, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.7350746268656716, |
|
"grad_norm": 2.8244668704260434, |
|
"learning_rate": 1.4384904350599496e-06, |
|
"loss": 1.158, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.7388059701492538, |
|
"grad_norm": 2.154427501128158, |
|
"learning_rate": 1.433114847660217e-06, |
|
"loss": 1.1111, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.7425373134328358, |
|
"grad_norm": 1.905058417754889, |
|
"learning_rate": 1.427723810205187e-06, |
|
"loss": 0.969, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 2.739735762190122, |
|
"learning_rate": 1.4223175150037295e-06, |
|
"loss": 1.2142, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.4650290962226777, |
|
"learning_rate": 1.4168961549089872e-06, |
|
"loss": 1.1373, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.753731343283582, |
|
"grad_norm": 2.5869478423809786, |
|
"learning_rate": 1.4114599233114986e-06, |
|
"loss": 1.3506, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.7574626865671642, |
|
"grad_norm": 3.1980963820842483, |
|
"learning_rate": 1.4060090141322966e-06, |
|
"loss": 1.0384, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.7611940298507462, |
|
"grad_norm": 2.5362305958432443, |
|
"learning_rate": 1.4005436218159925e-06, |
|
"loss": 1.1983, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.7649253731343284, |
|
"grad_norm": 1.7669955812420282, |
|
"learning_rate": 1.3950639413238393e-06, |
|
"loss": 1.1922, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.7686567164179104, |
|
"grad_norm": 3.236818206550707, |
|
"learning_rate": 1.3895701681267782e-06, |
|
"loss": 1.1532, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.7723880597014925, |
|
"grad_norm": 3.1410703998345917, |
|
"learning_rate": 1.384062498198464e-06, |
|
"loss": 1.2707, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.7761194029850746, |
|
"grad_norm": 2.947726795909021, |
|
"learning_rate": 1.3785411280082746e-06, |
|
"loss": 1.1552, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.7798507462686567, |
|
"grad_norm": 4.158405889593859, |
|
"learning_rate": 1.373006254514304e-06, |
|
"loss": 1.1323, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.7835820895522388, |
|
"grad_norm": 3.6596410080845483, |
|
"learning_rate": 1.3674580751563357e-06, |
|
"loss": 1.1021, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.7873134328358209, |
|
"grad_norm": 3.4837568397902063, |
|
"learning_rate": 1.361896787848798e-06, |
|
"loss": 1.1507, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.7910447761194029, |
|
"grad_norm": 5.190434900700764, |
|
"learning_rate": 1.3563225909737074e-06, |
|
"loss": 1.1307, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.7947761194029851, |
|
"grad_norm": 3.193649918972427, |
|
"learning_rate": 1.3507356833735885e-06, |
|
"loss": 1.1674, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.7985074626865671, |
|
"grad_norm": 3.64309739990448, |
|
"learning_rate": 1.3451362643443831e-06, |
|
"loss": 1.1026, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.8022388059701493, |
|
"grad_norm": 4.480821519285648, |
|
"learning_rate": 1.3395245336283396e-06, |
|
"loss": 1.1305, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.8059701492537313, |
|
"grad_norm": 2.485764813025922, |
|
"learning_rate": 1.333900691406889e-06, |
|
"loss": 1.0909, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.8097014925373134, |
|
"grad_norm": 2.8276534151044417, |
|
"learning_rate": 1.3282649382935028e-06, |
|
"loss": 1.2906, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.8134328358208955, |
|
"grad_norm": 2.661022282944918, |
|
"learning_rate": 1.322617475326538e-06, |
|
"loss": 1.0923, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.8171641791044776, |
|
"grad_norm": 2.6551254805947053, |
|
"learning_rate": 1.316958503962065e-06, |
|
"loss": 1.1648, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.8208955223880597, |
|
"grad_norm": 2.3353396983390486, |
|
"learning_rate": 1.3112882260666805e-06, |
|
"loss": 1.2479, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.8246268656716418, |
|
"grad_norm": 1.8853847357875915, |
|
"learning_rate": 1.3056068439103082e-06, |
|
"loss": 0.9367, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.8283582089552238, |
|
"grad_norm": 1.7789270126386558, |
|
"learning_rate": 1.299914560158982e-06, |
|
"loss": 0.9866, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.832089552238806, |
|
"grad_norm": 4.437767240695352, |
|
"learning_rate": 1.2942115778676175e-06, |
|
"loss": 1.0143, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.835820895522388, |
|
"grad_norm": 2.643730633752304, |
|
"learning_rate": 1.2884981004727675e-06, |
|
"loss": 1.1737, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.8395522388059702, |
|
"grad_norm": 4.113252275049106, |
|
"learning_rate": 1.2827743317853666e-06, |
|
"loss": 1.278, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8432835820895522, |
|
"grad_norm": 4.473452632975873, |
|
"learning_rate": 1.2770404759834592e-06, |
|
"loss": 1.2337, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.8470149253731343, |
|
"grad_norm": 3.3121627389468227, |
|
"learning_rate": 1.2712967376049176e-06, |
|
"loss": 0.9808, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.8507462686567164, |
|
"grad_norm": 2.765455947896225, |
|
"learning_rate": 1.2655433215401437e-06, |
|
"loss": 0.809, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.8544776119402985, |
|
"grad_norm": 5.806520585625066, |
|
"learning_rate": 1.2597804330247629e-06, |
|
"loss": 1.3475, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.8582089552238806, |
|
"grad_norm": 4.3730223037366365, |
|
"learning_rate": 1.2540082776323006e-06, |
|
"loss": 1.0836, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.8619402985074627, |
|
"grad_norm": 2.5075803170353987, |
|
"learning_rate": 1.2482270612668507e-06, |
|
"loss": 1.1071, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.8656716417910447, |
|
"grad_norm": 3.845367887472252, |
|
"learning_rate": 1.242436990155728e-06, |
|
"loss": 1.249, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.8694029850746269, |
|
"grad_norm": 3.2664015113912237, |
|
"learning_rate": 1.2366382708421154e-06, |
|
"loss": 1.1988, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.8731343283582089, |
|
"grad_norm": 3.9034686201589586, |
|
"learning_rate": 1.2308311101776932e-06, |
|
"loss": 1.2718, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.8768656716417911, |
|
"grad_norm": 2.1523180105846844, |
|
"learning_rate": 1.2250157153152609e-06, |
|
"loss": 1.1845, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.8805970149253731, |
|
"grad_norm": 2.8559179892421116, |
|
"learning_rate": 1.2191922937013488e-06, |
|
"loss": 1.2277, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.8843283582089553, |
|
"grad_norm": 2.4964069518984697, |
|
"learning_rate": 1.2133610530688167e-06, |
|
"loss": 1.1304, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.8880597014925373, |
|
"grad_norm": 1.6510558048415136, |
|
"learning_rate": 1.2075222014294447e-06, |
|
"loss": 1.0716, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.8917910447761194, |
|
"grad_norm": 4.138846396996276, |
|
"learning_rate": 1.2016759470665109e-06, |
|
"loss": 1.1715, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 2.591000009602829, |
|
"learning_rate": 1.1958224985273645e-06, |
|
"loss": 1.2082, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8992537313432836, |
|
"grad_norm": 1.6656445235525534, |
|
"learning_rate": 1.1899620646159853e-06, |
|
"loss": 1.057, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.9029850746268657, |
|
"grad_norm": 3.4344302308874486, |
|
"learning_rate": 1.1840948543855334e-06, |
|
"loss": 0.9381, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.9067164179104478, |
|
"grad_norm": 2.5448689987449384, |
|
"learning_rate": 1.1782210771308947e-06, |
|
"loss": 1.1778, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.9104477611940298, |
|
"grad_norm": 2.2000538592782433, |
|
"learning_rate": 1.1723409423812134e-06, |
|
"loss": 1.1269, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.914179104477612, |
|
"grad_norm": 1.6886830906655088, |
|
"learning_rate": 1.1664546598924184e-06, |
|
"loss": 1.1615, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.917910447761194, |
|
"grad_norm": 2.2494221352588886, |
|
"learning_rate": 1.1605624396397398e-06, |
|
"loss": 1.4029, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.9216417910447762, |
|
"grad_norm": 2.012712883705275, |
|
"learning_rate": 1.1546644918102196e-06, |
|
"loss": 1.1799, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.9253731343283582, |
|
"grad_norm": 2.3518817671490586, |
|
"learning_rate": 1.1487610267952142e-06, |
|
"loss": 1.1566, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.9291044776119403, |
|
"grad_norm": 2.0646756101710593, |
|
"learning_rate": 1.1428522551828882e-06, |
|
"loss": 1.2883, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.9328358208955224, |
|
"grad_norm": 1.812081401132651, |
|
"learning_rate": 1.1369383877507034e-06, |
|
"loss": 1.2653, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.9365671641791045, |
|
"grad_norm": 2.242364078092567, |
|
"learning_rate": 1.131019635457899e-06, |
|
"loss": 1.1829, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.9402985074626866, |
|
"grad_norm": 2.0182289267611258, |
|
"learning_rate": 1.1250962094379668e-06, |
|
"loss": 0.9778, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.9440298507462687, |
|
"grad_norm": 2.4797725291235593, |
|
"learning_rate": 1.1191683209911201e-06, |
|
"loss": 1.0714, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.9477611940298507, |
|
"grad_norm": 2.519766746322205, |
|
"learning_rate": 1.1132361815767552e-06, |
|
"loss": 1.2406, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.9514925373134329, |
|
"grad_norm": 2.063185346641498, |
|
"learning_rate": 1.1073000028059095e-06, |
|
"loss": 0.987, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.9552238805970149, |
|
"grad_norm": 1.5333052526002764, |
|
"learning_rate": 1.1013599964337106e-06, |
|
"loss": 0.8951, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.9589552238805971, |
|
"grad_norm": 4.771307269529906, |
|
"learning_rate": 1.095416374351826e-06, |
|
"loss": 1.2666, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.9626865671641791, |
|
"grad_norm": 2.717541175438304, |
|
"learning_rate": 1.0894693485809014e-06, |
|
"loss": 1.1109, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.9664179104477612, |
|
"grad_norm": 2.756698383274168, |
|
"learning_rate": 1.0835191312629992e-06, |
|
"loss": 1.129, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.9701492537313433, |
|
"grad_norm": 1.7033771070854546, |
|
"learning_rate": 1.0775659346540303e-06, |
|
"loss": 0.9603, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.9738805970149254, |
|
"grad_norm": 2.5967915673434034, |
|
"learning_rate": 1.0716099711161832e-06, |
|
"loss": 1.1943, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.9776119402985075, |
|
"grad_norm": 2.145466598370863, |
|
"learning_rate": 1.0656514531103483e-06, |
|
"loss": 0.8841, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.9813432835820896, |
|
"grad_norm": 2.1800126434020477, |
|
"learning_rate": 1.0596905931885373e-06, |
|
"loss": 0.9661, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.9850746268656716, |
|
"grad_norm": 2.8261681835210544, |
|
"learning_rate": 1.0537276039863047e-06, |
|
"loss": 1.1867, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.9888059701492538, |
|
"grad_norm": 3.2978247537112586, |
|
"learning_rate": 1.04776269821516e-06, |
|
"loss": 1.2103, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.9925373134328358, |
|
"grad_norm": 2.8052176780437064, |
|
"learning_rate": 1.0417960886549798e-06, |
|
"loss": 1.3141, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.996268656716418, |
|
"grad_norm": 2.5933472539580635, |
|
"learning_rate": 1.035827988146418e-06, |
|
"loss": 1.078, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.8520830177001395, |
|
"learning_rate": 1.0298586095833151e-06, |
|
"loss": 1.3273, |
|
"step": 268 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 536, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 268, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 50952048476160.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|