|
{ |
|
"best_metric": 11.5, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-300", |
|
"epoch": 0.16387286197125397, |
|
"eval_steps": 300, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0001365607183093783, |
|
"grad_norm": 9.676669833424967e-06, |
|
"learning_rate": 2e-05, |
|
"loss": 46.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0001365607183093783, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 20.0819, |
|
"eval_samples_per_second": 153.571, |
|
"eval_steps_per_second": 76.786, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002731214366187566, |
|
"grad_norm": 8.113798685371876e-06, |
|
"learning_rate": 4e-05, |
|
"loss": 46.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0004096821549281349, |
|
"grad_norm": 4.353820713731693e-06, |
|
"learning_rate": 6e-05, |
|
"loss": 46.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005462428732375132, |
|
"grad_norm": 5.62772765988484e-06, |
|
"learning_rate": 8e-05, |
|
"loss": 46.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0006828035915468915, |
|
"grad_norm": 6.43259363641846e-06, |
|
"learning_rate": 0.0001, |
|
"loss": 46.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0008193643098562698, |
|
"grad_norm": 9.933234650816303e-06, |
|
"learning_rate": 0.00012, |
|
"loss": 46.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0009559250281656481, |
|
"grad_norm": 6.729349024681142e-06, |
|
"learning_rate": 0.00014, |
|
"loss": 46.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0010924857464750264, |
|
"grad_norm": 7.317645668081241e-06, |
|
"learning_rate": 0.00016, |
|
"loss": 46.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0012290464647844047, |
|
"grad_norm": 7.1731265052221715e-06, |
|
"learning_rate": 0.00018, |
|
"loss": 46.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.001365607183093783, |
|
"grad_norm": 7.621179065608885e-06, |
|
"learning_rate": 0.0002, |
|
"loss": 46.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0015021679014031613, |
|
"grad_norm": 1.141078882938018e-05, |
|
"learning_rate": 0.00019999994480149276, |
|
"loss": 46.0, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0016387286197125396, |
|
"grad_norm": 9.140413567365613e-06, |
|
"learning_rate": 0.00019999977920603197, |
|
"loss": 46.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.001775289338021918, |
|
"grad_norm": 1.240484562003985e-05, |
|
"learning_rate": 0.0001999995032138004, |
|
"loss": 46.0, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0019118500563312963, |
|
"grad_norm": 1.1790569260483608e-05, |
|
"learning_rate": 0.00019999911682510278, |
|
"loss": 46.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0020484107746406746, |
|
"grad_norm": 1.051307117450051e-05, |
|
"learning_rate": 0.00019999862004036568, |
|
"loss": 46.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0021849714929500527, |
|
"grad_norm": 9.398099791724235e-06, |
|
"learning_rate": 0.0001999980128601375, |
|
"loss": 46.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0023215322112594312, |
|
"grad_norm": 7.582193120470038e-06, |
|
"learning_rate": 0.00019999729528508855, |
|
"loss": 46.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0024580929295688093, |
|
"grad_norm": 1.940154834301211e-05, |
|
"learning_rate": 0.00019999646731601103, |
|
"loss": 46.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.002594653647878188, |
|
"grad_norm": 7.919963536551222e-06, |
|
"learning_rate": 0.00019999552895381902, |
|
"loss": 46.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.002731214366187566, |
|
"grad_norm": 1.3096555449010339e-05, |
|
"learning_rate": 0.0001999944801995484, |
|
"loss": 46.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0028677750844969445, |
|
"grad_norm": 1.2133096788602415e-05, |
|
"learning_rate": 0.00019999332105435696, |
|
"loss": 46.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0030043358028063226, |
|
"grad_norm": 3.7125832022866234e-05, |
|
"learning_rate": 0.00019999205151952437, |
|
"loss": 46.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.003140896521115701, |
|
"grad_norm": 1.1414869732107036e-05, |
|
"learning_rate": 0.0001999906715964522, |
|
"loss": 46.0, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0032774572394250793, |
|
"grad_norm": 1.2319793313508853e-05, |
|
"learning_rate": 0.0001999891812866638, |
|
"loss": 46.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003414017957734458, |
|
"grad_norm": 1.1083939170930535e-05, |
|
"learning_rate": 0.00019998758059180447, |
|
"loss": 46.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.003550578676043836, |
|
"grad_norm": 1.0406022738607135e-05, |
|
"learning_rate": 0.00019998586951364125, |
|
"loss": 46.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0036871393943532144, |
|
"grad_norm": 8.916207661968656e-06, |
|
"learning_rate": 0.0001999840480540632, |
|
"loss": 46.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0038237001126625926, |
|
"grad_norm": 8.375522156711668e-06, |
|
"learning_rate": 0.0001999821162150811, |
|
"loss": 46.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.003960260830971971, |
|
"grad_norm": 5.647367288474925e-06, |
|
"learning_rate": 0.00019998007399882765, |
|
"loss": 46.0, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.004096821549281349, |
|
"grad_norm": 2.2721666027791798e-05, |
|
"learning_rate": 0.00019997792140755746, |
|
"loss": 46.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004233382267590727, |
|
"grad_norm": 1.8854540030588396e-05, |
|
"learning_rate": 0.00019997565844364688, |
|
"loss": 46.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.004369942985900105, |
|
"grad_norm": 8.220870768127497e-06, |
|
"learning_rate": 0.00019997328510959413, |
|
"loss": 46.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.004506503704209484, |
|
"grad_norm": 7.226680736494018e-06, |
|
"learning_rate": 0.00019997080140801932, |
|
"loss": 46.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0046430644225188625, |
|
"grad_norm": 4.141399131185608e-06, |
|
"learning_rate": 0.0001999682073416644, |
|
"loss": 46.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.004779625140828241, |
|
"grad_norm": 1.651368074817583e-05, |
|
"learning_rate": 0.00019996550291339311, |
|
"loss": 46.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004916185859137619, |
|
"grad_norm": 2.9052245736238547e-05, |
|
"learning_rate": 0.00019996268812619107, |
|
"loss": 46.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.005052746577446998, |
|
"grad_norm": 9.355511792819016e-06, |
|
"learning_rate": 0.00019995976298316576, |
|
"loss": 46.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.005189307295756376, |
|
"grad_norm": 1.852245804911945e-05, |
|
"learning_rate": 0.00019995672748754638, |
|
"loss": 46.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.005325868014065754, |
|
"grad_norm": 2.2482845452032052e-05, |
|
"learning_rate": 0.0001999535816426841, |
|
"loss": 46.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.005462428732375132, |
|
"grad_norm": 4.135522249271162e-05, |
|
"learning_rate": 0.0001999503254520518, |
|
"loss": 46.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.005598989450684511, |
|
"grad_norm": 4.387225635582581e-05, |
|
"learning_rate": 0.0001999469589192442, |
|
"loss": 46.0, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.005735550168993889, |
|
"grad_norm": 1.1143504707433749e-05, |
|
"learning_rate": 0.00019994348204797788, |
|
"loss": 46.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.005872110887303267, |
|
"grad_norm": 1.7061322068911977e-05, |
|
"learning_rate": 0.00019993989484209118, |
|
"loss": 46.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.006008671605612645, |
|
"grad_norm": 3.7752193748019636e-05, |
|
"learning_rate": 0.0001999361973055443, |
|
"loss": 46.0, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.006145232323922024, |
|
"grad_norm": 2.867165494535584e-05, |
|
"learning_rate": 0.0001999323894424192, |
|
"loss": 46.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.006281793042231402, |
|
"grad_norm": 1.418732153979363e-05, |
|
"learning_rate": 0.0001999284712569196, |
|
"loss": 46.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.00641835376054078, |
|
"grad_norm": 5.592720117419958e-05, |
|
"learning_rate": 0.00019992444275337114, |
|
"loss": 46.0, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0065549144788501585, |
|
"grad_norm": 7.95181404100731e-05, |
|
"learning_rate": 0.0001999203039362211, |
|
"loss": 46.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.006691475197159537, |
|
"grad_norm": 2.4339618903468363e-05, |
|
"learning_rate": 0.00019991605481003866, |
|
"loss": 46.0, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.006828035915468916, |
|
"grad_norm": 3.441837543505244e-05, |
|
"learning_rate": 0.00019991169537951468, |
|
"loss": 46.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006964596633778294, |
|
"grad_norm": 1.7208472854690626e-05, |
|
"learning_rate": 0.0001999072256494619, |
|
"loss": 46.0, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.007101157352087672, |
|
"grad_norm": 1.2097309991077054e-05, |
|
"learning_rate": 0.00019990264562481472, |
|
"loss": 46.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.00723771807039705, |
|
"grad_norm": 1.3584556654677726e-05, |
|
"learning_rate": 0.00019989795531062936, |
|
"loss": 46.0, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.007374278788706429, |
|
"grad_norm": 2.912199306592811e-05, |
|
"learning_rate": 0.00019989315471208378, |
|
"loss": 46.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.007510839507015807, |
|
"grad_norm": 2.0452795070013963e-05, |
|
"learning_rate": 0.00019988824383447776, |
|
"loss": 46.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.007647400225325185, |
|
"grad_norm": 1.590143256180454e-05, |
|
"learning_rate": 0.00019988322268323268, |
|
"loss": 46.0, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.007783960943634563, |
|
"grad_norm": 1.094182789529441e-05, |
|
"learning_rate": 0.00019987809126389177, |
|
"loss": 46.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.007920521661943942, |
|
"grad_norm": 1.5657904441468418e-05, |
|
"learning_rate": 0.00019987284958211996, |
|
"loss": 46.0, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.00805708238025332, |
|
"grad_norm": 1.1172323866048828e-05, |
|
"learning_rate": 0.00019986749764370392, |
|
"loss": 46.0, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.008193643098562698, |
|
"grad_norm": 1.1931785593333188e-05, |
|
"learning_rate": 0.00019986203545455203, |
|
"loss": 46.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.008330203816872076, |
|
"grad_norm": 2.3814011001377366e-05, |
|
"learning_rate": 0.0001998564630206944, |
|
"loss": 46.0, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.008466764535181455, |
|
"grad_norm": 1.1059896678489167e-05, |
|
"learning_rate": 0.0001998507803482828, |
|
"loss": 46.0, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.008603325253490833, |
|
"grad_norm": 1.2055512343067676e-05, |
|
"learning_rate": 0.00019984498744359075, |
|
"loss": 46.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.00873988597180021, |
|
"grad_norm": 3.0810657335678115e-05, |
|
"learning_rate": 0.00019983908431301343, |
|
"loss": 46.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.00887644669010959, |
|
"grad_norm": 2.3847031116019934e-05, |
|
"learning_rate": 0.0001998330709630677, |
|
"loss": 46.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.009013007408418969, |
|
"grad_norm": 2.5676057703094557e-05, |
|
"learning_rate": 0.0001998269474003922, |
|
"loss": 46.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.009149568126728347, |
|
"grad_norm": 1.9099008568446152e-05, |
|
"learning_rate": 0.0001998207136317471, |
|
"loss": 46.0, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.009286128845037725, |
|
"grad_norm": 1.620809234736953e-05, |
|
"learning_rate": 0.00019981436966401425, |
|
"loss": 46.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.009422689563347103, |
|
"grad_norm": 2.771842628135346e-05, |
|
"learning_rate": 0.00019980791550419728, |
|
"loss": 46.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.009559250281656481, |
|
"grad_norm": 1.6155812772922218e-05, |
|
"learning_rate": 0.00019980135115942136, |
|
"loss": 46.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.00969581099996586, |
|
"grad_norm": 1.4296605513663962e-05, |
|
"learning_rate": 0.00019979467663693332, |
|
"loss": 46.0, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.009832371718275237, |
|
"grad_norm": 2.5244138669222593e-05, |
|
"learning_rate": 0.00019978789194410168, |
|
"loss": 46.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.009968932436584617, |
|
"grad_norm": 2.8768767151632346e-05, |
|
"learning_rate": 0.00019978099708841646, |
|
"loss": 46.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.010105493154893995, |
|
"grad_norm": 2.3254493498825468e-05, |
|
"learning_rate": 0.00019977399207748943, |
|
"loss": 46.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.010242053873203373, |
|
"grad_norm": 2.780866634566337e-05, |
|
"learning_rate": 0.00019976687691905393, |
|
"loss": 46.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.010378614591512752, |
|
"grad_norm": 9.848828085523564e-06, |
|
"learning_rate": 0.00019975965162096485, |
|
"loss": 46.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.01051517530982213, |
|
"grad_norm": 1.7771888451534323e-05, |
|
"learning_rate": 0.00019975231619119867, |
|
"loss": 46.0, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.010651736028131508, |
|
"grad_norm": 2.449357634759508e-05, |
|
"learning_rate": 0.00019974487063785355, |
|
"loss": 46.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.010788296746440886, |
|
"grad_norm": 2.3659646103624254e-05, |
|
"learning_rate": 0.00019973731496914914, |
|
"loss": 46.0, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.010924857464750264, |
|
"grad_norm": 4.919664206681773e-05, |
|
"learning_rate": 0.00019972964919342663, |
|
"loss": 46.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.011061418183059642, |
|
"grad_norm": 3.88891676266212e-05, |
|
"learning_rate": 0.00019972187331914886, |
|
"loss": 46.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.011197978901369022, |
|
"grad_norm": 5.824596883030608e-05, |
|
"learning_rate": 0.00019971398735490014, |
|
"loss": 46.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0113345396196784, |
|
"grad_norm": 2.443828088871669e-05, |
|
"learning_rate": 0.00019970599130938633, |
|
"loss": 46.0, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.011471100337987778, |
|
"grad_norm": 1.806908221624326e-05, |
|
"learning_rate": 0.0001996978851914349, |
|
"loss": 46.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.011607661056297156, |
|
"grad_norm": 1.9875111320288852e-05, |
|
"learning_rate": 0.00019968966900999464, |
|
"loss": 46.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.011744221774606534, |
|
"grad_norm": 2.811396734614391e-05, |
|
"learning_rate": 0.00019968134277413606, |
|
"loss": 46.0, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.011880782492915912, |
|
"grad_norm": 2.5689738322398625e-05, |
|
"learning_rate": 0.00019967290649305103, |
|
"loss": 46.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.01201734321122529, |
|
"grad_norm": 4.2773946915986016e-05, |
|
"learning_rate": 0.00019966436017605297, |
|
"loss": 46.0, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.012153903929534669, |
|
"grad_norm": 0.00021493734675459564, |
|
"learning_rate": 0.00019965570383257677, |
|
"loss": 46.0, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.012290464647844048, |
|
"grad_norm": 0.00024037304683588445, |
|
"learning_rate": 0.00019964693747217874, |
|
"loss": 46.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.012427025366153427, |
|
"grad_norm": 4.650273695006035e-05, |
|
"learning_rate": 0.00019963806110453672, |
|
"loss": 46.0, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.012563586084462805, |
|
"grad_norm": 2.4313370886375196e-05, |
|
"learning_rate": 0.00019962907473944995, |
|
"loss": 46.0, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.012700146802772183, |
|
"grad_norm": 1.7518057575216517e-05, |
|
"learning_rate": 0.00019961997838683905, |
|
"loss": 46.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.01283670752108156, |
|
"grad_norm": 3.366146847838536e-05, |
|
"learning_rate": 0.00019961077205674622, |
|
"loss": 46.0, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.012973268239390939, |
|
"grad_norm": 4.310454460210167e-05, |
|
"learning_rate": 0.00019960145575933486, |
|
"loss": 46.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.013109828957700317, |
|
"grad_norm": 4.665861342800781e-05, |
|
"learning_rate": 0.00019959202950489, |
|
"loss": 46.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.013246389676009695, |
|
"grad_norm": 2.579794090706855e-05, |
|
"learning_rate": 0.00019958249330381787, |
|
"loss": 46.0, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.013382950394319073, |
|
"grad_norm": 0.00010568476136540994, |
|
"learning_rate": 0.00019957284716664618, |
|
"loss": 46.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.013519511112628453, |
|
"grad_norm": 3.9795751945348457e-05, |
|
"learning_rate": 0.00019956309110402397, |
|
"loss": 46.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.013656071830937831, |
|
"grad_norm": 2.9642118533956818e-05, |
|
"learning_rate": 0.00019955322512672162, |
|
"loss": 46.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01379263254924721, |
|
"grad_norm": 1.9580385924200527e-05, |
|
"learning_rate": 0.00019954324924563089, |
|
"loss": 46.0, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.013929193267556587, |
|
"grad_norm": 3.410055433050729e-05, |
|
"learning_rate": 0.00019953316347176488, |
|
"loss": 46.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.014065753985865966, |
|
"grad_norm": 5.128211705596186e-05, |
|
"learning_rate": 0.00019952296781625795, |
|
"loss": 46.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.014202314704175344, |
|
"grad_norm": 3.538643795764074e-05, |
|
"learning_rate": 0.0001995126622903658, |
|
"loss": 46.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.014338875422484722, |
|
"grad_norm": 2.4323409888893366e-05, |
|
"learning_rate": 0.00019950224690546545, |
|
"loss": 46.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0144754361407941, |
|
"grad_norm": 3.726944123627618e-05, |
|
"learning_rate": 0.00019949172167305516, |
|
"loss": 46.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01461199685910348, |
|
"grad_norm": 3.0351222449098714e-05, |
|
"learning_rate": 0.00019948108660475445, |
|
"loss": 46.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.014748557577412858, |
|
"grad_norm": 3.8205947930691764e-05, |
|
"learning_rate": 0.0001994703417123042, |
|
"loss": 46.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.014885118295722236, |
|
"grad_norm": 3.468756040092558e-05, |
|
"learning_rate": 0.00019945948700756633, |
|
"loss": 46.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.015021679014031614, |
|
"grad_norm": 2.4972347091534175e-05, |
|
"learning_rate": 0.00019944852250252418, |
|
"loss": 46.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.015158239732340992, |
|
"grad_norm": 2.8265107175684534e-05, |
|
"learning_rate": 0.00019943744820928222, |
|
"loss": 46.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.01529480045065037, |
|
"grad_norm": 2.0429773940122686e-05, |
|
"learning_rate": 0.00019942626414006615, |
|
"loss": 46.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.015431361168959748, |
|
"grad_norm": 3.450764052104205e-05, |
|
"learning_rate": 0.00019941497030722286, |
|
"loss": 46.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.015567921887269126, |
|
"grad_norm": 4.498309499467723e-05, |
|
"learning_rate": 0.00019940356672322037, |
|
"loss": 46.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.015704482605578506, |
|
"grad_norm": 2.8786233087885194e-05, |
|
"learning_rate": 0.00019939205340064792, |
|
"loss": 46.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.015841043323887884, |
|
"grad_norm": 3.189581184415147e-05, |
|
"learning_rate": 0.00019938043035221586, |
|
"loss": 46.0, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.015977604042197262, |
|
"grad_norm": 4.335124685894698e-05, |
|
"learning_rate": 0.0001993686975907557, |
|
"loss": 46.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.01611416476050664, |
|
"grad_norm": 4.708675987785682e-05, |
|
"learning_rate": 0.00019935685512922007, |
|
"loss": 46.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.01625072547881602, |
|
"grad_norm": 3.432563244132325e-05, |
|
"learning_rate": 0.00019934490298068264, |
|
"loss": 46.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.016387286197125397, |
|
"grad_norm": 4.133255788474344e-05, |
|
"learning_rate": 0.0001993328411583383, |
|
"loss": 46.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.016523846915434775, |
|
"grad_norm": 2.959890480269678e-05, |
|
"learning_rate": 0.00019932066967550289, |
|
"loss": 46.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.016660407633744153, |
|
"grad_norm": 5.350433275452815e-05, |
|
"learning_rate": 0.0001993083885456134, |
|
"loss": 46.0, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.01679696835205353, |
|
"grad_norm": 2.8684948119916953e-05, |
|
"learning_rate": 0.0001992959977822278, |
|
"loss": 46.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.01693352907036291, |
|
"grad_norm": 4.703548984252848e-05, |
|
"learning_rate": 0.0001992834973990251, |
|
"loss": 46.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.017070089788672287, |
|
"grad_norm": 2.6726818759925663e-05, |
|
"learning_rate": 0.0001992708874098054, |
|
"loss": 46.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.017206650506981665, |
|
"grad_norm": 3.8927741115912795e-05, |
|
"learning_rate": 0.00019925816782848975, |
|
"loss": 46.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.017343211225291044, |
|
"grad_norm": 8.123937004711479e-05, |
|
"learning_rate": 0.00019924533866912017, |
|
"loss": 46.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.01747977194360042, |
|
"grad_norm": 4.269002965884283e-05, |
|
"learning_rate": 0.00019923239994585967, |
|
"loss": 46.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.017616332661909803, |
|
"grad_norm": 6.259434303501621e-05, |
|
"learning_rate": 0.0001992193516729922, |
|
"loss": 46.0, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.01775289338021918, |
|
"grad_norm": 5.1137911214027554e-05, |
|
"learning_rate": 0.0001992061938649227, |
|
"loss": 46.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01788945409852856, |
|
"grad_norm": 8.54436366353184e-05, |
|
"learning_rate": 0.00019919292653617694, |
|
"loss": 46.0, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.018026014816837937, |
|
"grad_norm": 3.590865890146233e-05, |
|
"learning_rate": 0.00019917954970140173, |
|
"loss": 46.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.018162575535147316, |
|
"grad_norm": 3.2492869650013745e-05, |
|
"learning_rate": 0.00019916606337536466, |
|
"loss": 46.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.018299136253456694, |
|
"grad_norm": 8.628293289802969e-05, |
|
"learning_rate": 0.00019915246757295417, |
|
"loss": 46.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.018435696971766072, |
|
"grad_norm": 7.722469308646396e-05, |
|
"learning_rate": 0.00019913876230917975, |
|
"loss": 46.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.01857225769007545, |
|
"grad_norm": 4.240256384946406e-05, |
|
"learning_rate": 0.00019912494759917148, |
|
"loss": 46.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.018708818408384828, |
|
"grad_norm": 4.052065560244955e-05, |
|
"learning_rate": 0.00019911102345818046, |
|
"loss": 46.0, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.018845379126694206, |
|
"grad_norm": 7.86216405685991e-05, |
|
"learning_rate": 0.00019909698990157852, |
|
"loss": 46.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.018981939845003584, |
|
"grad_norm": 0.00010640006075846031, |
|
"learning_rate": 0.00019908284694485827, |
|
"loss": 46.0, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.019118500563312962, |
|
"grad_norm": 0.00015501640154980123, |
|
"learning_rate": 0.00019906859460363307, |
|
"loss": 46.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01925506128162234, |
|
"grad_norm": 7.59594258852303e-05, |
|
"learning_rate": 0.00019905423289363715, |
|
"loss": 46.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.01939162199993172, |
|
"grad_norm": 5.662858529831283e-05, |
|
"learning_rate": 0.0001990397618307254, |
|
"loss": 46.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.019528182718241097, |
|
"grad_norm": 3.2056228519650176e-05, |
|
"learning_rate": 0.00019902518143087342, |
|
"loss": 46.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.019664743436550475, |
|
"grad_norm": 6.27958943368867e-05, |
|
"learning_rate": 0.00019901049171017752, |
|
"loss": 46.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.019801304154859853, |
|
"grad_norm": 8.007367432583123e-05, |
|
"learning_rate": 0.00019899569268485472, |
|
"loss": 46.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.019937864873169234, |
|
"grad_norm": 6.982972263358533e-05, |
|
"learning_rate": 0.00019898078437124276, |
|
"loss": 46.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.020074425591478613, |
|
"grad_norm": 4.9886282795341685e-05, |
|
"learning_rate": 0.0001989657667857999, |
|
"loss": 46.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.02021098630978799, |
|
"grad_norm": 9.474890248384327e-05, |
|
"learning_rate": 0.0001989506399451051, |
|
"loss": 46.0, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.02034754702809737, |
|
"grad_norm": 0.00012795208021998405, |
|
"learning_rate": 0.00019893540386585804, |
|
"loss": 46.0, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.020484107746406747, |
|
"grad_norm": 8.636638813186437e-05, |
|
"learning_rate": 0.00019892005856487878, |
|
"loss": 46.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.020620668464716125, |
|
"grad_norm": 5.324094672687352e-05, |
|
"learning_rate": 0.00019890460405910815, |
|
"loss": 46.0, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.020757229183025503, |
|
"grad_norm": 5.794732351205312e-05, |
|
"learning_rate": 0.00019888904036560745, |
|
"loss": 46.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.02089378990133488, |
|
"grad_norm": 6.407459295587614e-05, |
|
"learning_rate": 0.0001988733675015585, |
|
"loss": 46.0, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.02103035061964426, |
|
"grad_norm": 5.669236270477995e-05, |
|
"learning_rate": 0.00019885758548426367, |
|
"loss": 46.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.021166911337953637, |
|
"grad_norm": 8.20392815512605e-05, |
|
"learning_rate": 0.0001988416943311459, |
|
"loss": 46.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.021303472056263015, |
|
"grad_norm": 4.120526136830449e-05, |
|
"learning_rate": 0.00019882569405974852, |
|
"loss": 46.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.021440032774572394, |
|
"grad_norm": 6.0489343013614416e-05, |
|
"learning_rate": 0.0001988095846877353, |
|
"loss": 46.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.02157659349288177, |
|
"grad_norm": 5.2780691476073116e-05, |
|
"learning_rate": 0.00019879336623289056, |
|
"loss": 46.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.02171315421119115, |
|
"grad_norm": 7.062828808557242e-05, |
|
"learning_rate": 0.00019877703871311903, |
|
"loss": 46.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.021849714929500528, |
|
"grad_norm": 8.607962081441656e-05, |
|
"learning_rate": 0.00019876060214644566, |
|
"loss": 46.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.021986275647809906, |
|
"grad_norm": 5.568426786339842e-05, |
|
"learning_rate": 0.0001987440565510161, |
|
"loss": 46.0, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.022122836366119284, |
|
"grad_norm": 2.0496960132732056e-05, |
|
"learning_rate": 0.00019872740194509607, |
|
"loss": 46.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.022259397084428666, |
|
"grad_norm": 0.0001443786604795605, |
|
"learning_rate": 0.0001987106383470718, |
|
"loss": 46.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.022395957802738044, |
|
"grad_norm": 0.00022565714607480913, |
|
"learning_rate": 0.00019869376577544984, |
|
"loss": 46.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.022532518521047422, |
|
"grad_norm": 4.196175359538756e-05, |
|
"learning_rate": 0.00019867678424885692, |
|
"loss": 46.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0226690792393568, |
|
"grad_norm": 5.022220284445211e-05, |
|
"learning_rate": 0.0001986596937860402, |
|
"loss": 46.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.022805639957666178, |
|
"grad_norm": 6.462670717155561e-05, |
|
"learning_rate": 0.00019864249440586704, |
|
"loss": 46.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.022942200675975556, |
|
"grad_norm": 5.396630149334669e-05, |
|
"learning_rate": 0.00019862518612732502, |
|
"loss": 46.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.023078761394284934, |
|
"grad_norm": 7.057916081976146e-05, |
|
"learning_rate": 0.00019860776896952201, |
|
"loss": 46.0, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.023215322112594312, |
|
"grad_norm": 2.83908757410245e-05, |
|
"learning_rate": 0.00019859024295168593, |
|
"loss": 46.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02335188283090369, |
|
"grad_norm": 5.459811654873192e-05, |
|
"learning_rate": 0.0001985726080931651, |
|
"loss": 46.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.02348844354921307, |
|
"grad_norm": 9.622200013836846e-05, |
|
"learning_rate": 0.0001985548644134278, |
|
"loss": 46.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.023625004267522447, |
|
"grad_norm": 3.8583631976507604e-05, |
|
"learning_rate": 0.00019853701193206256, |
|
"loss": 46.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.023761564985831825, |
|
"grad_norm": 1.6933201550273225e-05, |
|
"learning_rate": 0.00019851905066877796, |
|
"loss": 46.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.023898125704141203, |
|
"grad_norm": 0.00014548096805810928, |
|
"learning_rate": 0.0001985009806434027, |
|
"loss": 46.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02403468642245058, |
|
"grad_norm": 3.331439438625239e-05, |
|
"learning_rate": 0.00019848280187588556, |
|
"loss": 46.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.02417124714075996, |
|
"grad_norm": 0.00010656285303412005, |
|
"learning_rate": 0.00019846451438629536, |
|
"loss": 46.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.024307807859069337, |
|
"grad_norm": 5.0110294978367165e-05, |
|
"learning_rate": 0.00019844611819482095, |
|
"loss": 46.0, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.024444368577378715, |
|
"grad_norm": 8.202512981370091e-05, |
|
"learning_rate": 0.00019842761332177115, |
|
"loss": 46.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.024580929295688097, |
|
"grad_norm": 4.755376357934438e-05, |
|
"learning_rate": 0.00019840899978757485, |
|
"loss": 46.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.024717490013997475, |
|
"grad_norm": 7.185702270362526e-05, |
|
"learning_rate": 0.0001983902776127807, |
|
"loss": 46.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.024854050732306853, |
|
"grad_norm": 4.723266465589404e-05, |
|
"learning_rate": 0.00019837144681805757, |
|
"loss": 46.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.02499061145061623, |
|
"grad_norm": 0.00011025248386431485, |
|
"learning_rate": 0.000198352507424194, |
|
"loss": 46.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.02512717216892561, |
|
"grad_norm": 0.00010296511027263477, |
|
"learning_rate": 0.00019833345945209857, |
|
"loss": 46.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.025263732887234987, |
|
"grad_norm": 7.570455636596307e-05, |
|
"learning_rate": 0.00019831430292279966, |
|
"loss": 46.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.025400293605544366, |
|
"grad_norm": 8.105228334898129e-05, |
|
"learning_rate": 0.0001982950378574455, |
|
"loss": 46.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.025536854323853744, |
|
"grad_norm": 0.00013544574903789908, |
|
"learning_rate": 0.00019827566427730412, |
|
"loss": 46.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.02567341504216312, |
|
"grad_norm": 6.9964567956049e-05, |
|
"learning_rate": 0.00019825618220376342, |
|
"loss": 46.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.0258099757604725, |
|
"grad_norm": 9.811633208300918e-05, |
|
"learning_rate": 0.00019823659165833102, |
|
"loss": 46.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.025946536478781878, |
|
"grad_norm": 0.00023017756757326424, |
|
"learning_rate": 0.00019821689266263427, |
|
"loss": 46.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.026083097197091256, |
|
"grad_norm": 0.0003564099024515599, |
|
"learning_rate": 0.0001981970852384203, |
|
"loss": 46.0, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.026219657915400634, |
|
"grad_norm": 5.261121259536594e-05, |
|
"learning_rate": 0.00019817716940755586, |
|
"loss": 46.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.026356218633710012, |
|
"grad_norm": 0.00012212673027534038, |
|
"learning_rate": 0.00019815714519202753, |
|
"loss": 46.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.02649277935201939, |
|
"grad_norm": 7.185459253378212e-05, |
|
"learning_rate": 0.00019813701261394136, |
|
"loss": 46.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.02662934007032877, |
|
"grad_norm": 0.00010008271055994555, |
|
"learning_rate": 0.00019811677169552313, |
|
"loss": 46.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.026765900788638147, |
|
"grad_norm": 0.00010963875683955848, |
|
"learning_rate": 0.0001980964224591183, |
|
"loss": 46.0, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.026902461506947528, |
|
"grad_norm": 5.4636468121316284e-05, |
|
"learning_rate": 0.00019807596492719167, |
|
"loss": 46.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.027039022225256906, |
|
"grad_norm": 5.760273052146658e-05, |
|
"learning_rate": 0.00019805539912232784, |
|
"loss": 46.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.027175582943566284, |
|
"grad_norm": 0.0001271786750294268, |
|
"learning_rate": 0.00019803472506723085, |
|
"loss": 46.0, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.027312143661875662, |
|
"grad_norm": 0.00014666210336145014, |
|
"learning_rate": 0.00019801394278472418, |
|
"loss": 46.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02744870438018504, |
|
"grad_norm": 3.408119664527476e-05, |
|
"learning_rate": 0.0001979930522977509, |
|
"loss": 46.0, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.02758526509849442, |
|
"grad_norm": 0.00010019735782407224, |
|
"learning_rate": 0.00019797205362937347, |
|
"loss": 46.0, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.027721825816803797, |
|
"grad_norm": 0.00011540239211171865, |
|
"learning_rate": 0.00019795094680277378, |
|
"loss": 46.0, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.027858386535113175, |
|
"grad_norm": 8.116603567032143e-05, |
|
"learning_rate": 0.0001979297318412532, |
|
"loss": 46.0, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.027994947253422553, |
|
"grad_norm": 0.0001576711074449122, |
|
"learning_rate": 0.00019790840876823232, |
|
"loss": 46.0, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.02813150797173193, |
|
"grad_norm": 8.809396240394562e-05, |
|
"learning_rate": 0.0001978869776072512, |
|
"loss": 46.0, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.02826806869004131, |
|
"grad_norm": 0.0001056291293934919, |
|
"learning_rate": 0.00019786543838196924, |
|
"loss": 46.0, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.028404629408350687, |
|
"grad_norm": 0.0001859702606452629, |
|
"learning_rate": 0.00019784379111616507, |
|
"loss": 46.0, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.028541190126660065, |
|
"grad_norm": 7.50935505493544e-05, |
|
"learning_rate": 0.00019782203583373664, |
|
"loss": 46.0, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.028677750844969443, |
|
"grad_norm": 4.688434273703024e-05, |
|
"learning_rate": 0.00019780017255870114, |
|
"loss": 46.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02881431156327882, |
|
"grad_norm": 0.00032884004758670926, |
|
"learning_rate": 0.00019777820131519495, |
|
"loss": 46.0, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.0289508722815882, |
|
"grad_norm": 0.0001322666648775339, |
|
"learning_rate": 0.0001977561221274737, |
|
"loss": 46.0, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.029087432999897578, |
|
"grad_norm": 0.00014480279060080647, |
|
"learning_rate": 0.00019773393501991212, |
|
"loss": 46.0, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.02922399371820696, |
|
"grad_norm": 0.00011657484719762579, |
|
"learning_rate": 0.0001977116400170041, |
|
"loss": 46.0, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.029360554436516337, |
|
"grad_norm": 0.00011347379040671512, |
|
"learning_rate": 0.00019768923714336272, |
|
"loss": 46.0, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.029497115154825716, |
|
"grad_norm": 0.00017761997878551483, |
|
"learning_rate": 0.00019766672642372002, |
|
"loss": 46.0, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.029633675873135094, |
|
"grad_norm": 0.00014384661335498095, |
|
"learning_rate": 0.00019764410788292722, |
|
"loss": 46.0, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.029770236591444472, |
|
"grad_norm": 7.11917455191724e-05, |
|
"learning_rate": 0.00019762138154595446, |
|
"loss": 46.0, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.02990679730975385, |
|
"grad_norm": 0.00012330934987403452, |
|
"learning_rate": 0.00019759854743789097, |
|
"loss": 46.0, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.030043358028063228, |
|
"grad_norm": 8.032290497794747e-05, |
|
"learning_rate": 0.00019757560558394493, |
|
"loss": 46.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.030179918746372606, |
|
"grad_norm": 0.00021323734836187214, |
|
"learning_rate": 0.0001975525560094434, |
|
"loss": 46.0, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.030316479464681984, |
|
"grad_norm": 0.00014698925951961428, |
|
"learning_rate": 0.00019752939873983255, |
|
"loss": 46.0, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.030453040182991362, |
|
"grad_norm": 0.00018497723795007914, |
|
"learning_rate": 0.00019750613380067718, |
|
"loss": 46.0, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.03058960090130074, |
|
"grad_norm": 0.0001770486414898187, |
|
"learning_rate": 0.00019748276121766116, |
|
"loss": 46.0, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.03072616161961012, |
|
"grad_norm": 6.994479190325364e-05, |
|
"learning_rate": 0.00019745928101658707, |
|
"loss": 46.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.030862722337919497, |
|
"grad_norm": 0.0001687954500084743, |
|
"learning_rate": 0.00019743569322337642, |
|
"loss": 46.0, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.030999283056228875, |
|
"grad_norm": 0.00010247869795421138, |
|
"learning_rate": 0.00019741199786406938, |
|
"loss": 46.0, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.031135843774538253, |
|
"grad_norm": 0.00013670619227923453, |
|
"learning_rate": 0.00019738819496482494, |
|
"loss": 46.0, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.031272404492847634, |
|
"grad_norm": 0.00013827405928168446, |
|
"learning_rate": 0.0001973642845519208, |
|
"loss": 46.0, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.03140896521115701, |
|
"grad_norm": 0.00014026669668965042, |
|
"learning_rate": 0.00019734026665175334, |
|
"loss": 46.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03154552592946639, |
|
"grad_norm": 0.00010120788647327572, |
|
"learning_rate": 0.00019731614129083754, |
|
"loss": 46.0, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.03168208664777577, |
|
"grad_norm": 0.00014757270400878042, |
|
"learning_rate": 0.0001972919084958072, |
|
"loss": 46.0, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.03181864736608515, |
|
"grad_norm": 8.340936619788408e-05, |
|
"learning_rate": 0.00019726756829341446, |
|
"loss": 46.0, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.031955208084394525, |
|
"grad_norm": 0.00013478368055075407, |
|
"learning_rate": 0.0001972431207105303, |
|
"loss": 46.0, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.0320917688027039, |
|
"grad_norm": 0.00013591728929895908, |
|
"learning_rate": 0.00019721856577414407, |
|
"loss": 46.0, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.03222832952101328, |
|
"grad_norm": 0.0002845980925485492, |
|
"learning_rate": 0.00019719390351136365, |
|
"loss": 46.0, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.03236489023932266, |
|
"grad_norm": 0.00021167262457311153, |
|
"learning_rate": 0.0001971691339494155, |
|
"loss": 46.0, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.03250145095763204, |
|
"grad_norm": 0.00015637895558029413, |
|
"learning_rate": 0.00019714425711564446, |
|
"loss": 46.0, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.032638011675941415, |
|
"grad_norm": 0.0004328020440880209, |
|
"learning_rate": 0.00019711927303751382, |
|
"loss": 46.0, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.032774572394250794, |
|
"grad_norm": 7.109026773832738e-05, |
|
"learning_rate": 0.0001970941817426052, |
|
"loss": 46.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03291113311256017, |
|
"grad_norm": 0.00022207674919627607, |
|
"learning_rate": 0.00019706898325861874, |
|
"loss": 46.0, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.03304769383086955, |
|
"grad_norm": 7.223385910037905e-05, |
|
"learning_rate": 0.0001970436776133727, |
|
"loss": 46.0, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.03318425454917893, |
|
"grad_norm": 0.0002725286176428199, |
|
"learning_rate": 0.00019701826483480388, |
|
"loss": 46.0, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.033320815267488306, |
|
"grad_norm": 0.00011271587572991848, |
|
"learning_rate": 0.00019699274495096712, |
|
"loss": 46.0, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.033457375985797684, |
|
"grad_norm": 0.0001713872916297987, |
|
"learning_rate": 0.0001969671179900357, |
|
"loss": 46.0, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.03359393670410706, |
|
"grad_norm": 0.0001251544599654153, |
|
"learning_rate": 0.00019694138398030094, |
|
"loss": 46.0, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.03373049742241644, |
|
"grad_norm": 0.00030473063816316426, |
|
"learning_rate": 0.00019691554295017246, |
|
"loss": 46.0, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.03386705814072582, |
|
"grad_norm": 0.00020300064352340996, |
|
"learning_rate": 0.000196889594928178, |
|
"loss": 46.0, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.034003618859035196, |
|
"grad_norm": 0.0001950880396179855, |
|
"learning_rate": 0.00019686353994296333, |
|
"loss": 46.0, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.034140179577344575, |
|
"grad_norm": 0.00034574157325550914, |
|
"learning_rate": 0.00019683737802329244, |
|
"loss": 46.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03427674029565395, |
|
"grad_norm": 0.0001567787694511935, |
|
"learning_rate": 0.0001968111091980473, |
|
"loss": 46.0, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.03441330101396333, |
|
"grad_norm": 0.00012498951400630176, |
|
"learning_rate": 0.00019678473349622793, |
|
"loss": 46.0, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.03454986173227271, |
|
"grad_norm": 0.0002493293723091483, |
|
"learning_rate": 0.0001967582509469523, |
|
"loss": 46.0, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.03468642245058209, |
|
"grad_norm": 0.0003348199534229934, |
|
"learning_rate": 0.0001967316615794563, |
|
"loss": 46.0, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.034822983168891465, |
|
"grad_norm": 0.00020134066289756447, |
|
"learning_rate": 0.00019670496542309384, |
|
"loss": 46.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.03495954388720084, |
|
"grad_norm": 8.074977085925639e-05, |
|
"learning_rate": 0.0001966781625073367, |
|
"loss": 46.0, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.03509610460551023, |
|
"grad_norm": 0.00018040316354017705, |
|
"learning_rate": 0.00019665125286177449, |
|
"loss": 46.0, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.035232665323819606, |
|
"grad_norm": 0.00016175376367755234, |
|
"learning_rate": 0.00019662423651611464, |
|
"loss": 46.0, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.035369226042128984, |
|
"grad_norm": 6.748022133251652e-05, |
|
"learning_rate": 0.00019659711350018239, |
|
"loss": 46.0, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.03550578676043836, |
|
"grad_norm": 0.00014227218343876302, |
|
"learning_rate": 0.00019656988384392075, |
|
"loss": 46.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.03564234747874774, |
|
"grad_norm": 0.0002607603382784873, |
|
"learning_rate": 0.00019654254757739043, |
|
"loss": 46.0, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.03577890819705712, |
|
"grad_norm": 0.00026646576588973403, |
|
"learning_rate": 0.00019651510473076987, |
|
"loss": 46.0, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.0359154689153665, |
|
"grad_norm": 0.00020209423382766545, |
|
"learning_rate": 0.00019648755533435518, |
|
"loss": 46.0, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.036052029633675875, |
|
"grad_norm": 0.00013208483869675547, |
|
"learning_rate": 0.00019645989941855999, |
|
"loss": 46.0, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.03618859035198525, |
|
"grad_norm": 0.0002046150912065059, |
|
"learning_rate": 0.00019643213701391567, |
|
"loss": 46.0, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.03632515107029463, |
|
"grad_norm": 0.00022820701997261494, |
|
"learning_rate": 0.00019640426815107108, |
|
"loss": 46.0, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.03646171178860401, |
|
"grad_norm": 0.00019359646830707788, |
|
"learning_rate": 0.0001963762928607926, |
|
"loss": 46.0, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.03659827250691339, |
|
"grad_norm": 0.00015012026415206492, |
|
"learning_rate": 0.0001963482111739641, |
|
"loss": 46.0, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.036734833225222766, |
|
"grad_norm": 0.00021516659762710333, |
|
"learning_rate": 0.00019632002312158697, |
|
"loss": 46.0, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.036871393943532144, |
|
"grad_norm": 0.00019021316256839782, |
|
"learning_rate": 0.00019629172873477995, |
|
"loss": 46.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.03700795466184152, |
|
"grad_norm": 0.00023805341334082186, |
|
"learning_rate": 0.00019626332804477915, |
|
"loss": 46.0, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.0371445153801509, |
|
"grad_norm": 0.0002716032904572785, |
|
"learning_rate": 0.0001962348210829382, |
|
"loss": 46.0, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.03728107609846028, |
|
"grad_norm": 0.0001799121528165415, |
|
"learning_rate": 0.00019620620788072783, |
|
"loss": 46.0, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.037417636816769656, |
|
"grad_norm": 0.00041423438233323395, |
|
"learning_rate": 0.0001961774884697362, |
|
"loss": 46.0, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.037554197535079034, |
|
"grad_norm": 0.0002172417298424989, |
|
"learning_rate": 0.0001961486628816687, |
|
"loss": 46.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.03769075825338841, |
|
"grad_norm": 0.0002554766833782196, |
|
"learning_rate": 0.0001961197311483479, |
|
"loss": 46.0, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.03782731897169779, |
|
"grad_norm": 0.0002692249254323542, |
|
"learning_rate": 0.0001960906933017135, |
|
"loss": 46.0, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.03796387969000717, |
|
"grad_norm": 0.00040551606798544526, |
|
"learning_rate": 0.00019606154937382256, |
|
"loss": 46.0, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.03810044040831655, |
|
"grad_norm": 0.0003460289444774389, |
|
"learning_rate": 0.000196032299396849, |
|
"loss": 46.0, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.038237001126625925, |
|
"grad_norm": 0.000246795651037246, |
|
"learning_rate": 0.00019600294340308398, |
|
"loss": 46.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0383735618449353, |
|
"grad_norm": 0.0005187865463085473, |
|
"learning_rate": 0.00019597348142493562, |
|
"loss": 46.0, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.03851012256324468, |
|
"grad_norm": 0.0005177973653189838, |
|
"learning_rate": 0.00019594391349492902, |
|
"loss": 46.0, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.03864668328155406, |
|
"grad_norm": 0.00031362145091407, |
|
"learning_rate": 0.00019591423964570632, |
|
"loss": 46.0, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.03878324399986344, |
|
"grad_norm": 0.0003161428030580282, |
|
"learning_rate": 0.0001958844599100266, |
|
"loss": 46.0, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.038919804718172815, |
|
"grad_norm": 0.0002518398978281766, |
|
"learning_rate": 0.00019585457432076578, |
|
"loss": 46.0, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.03905636543648219, |
|
"grad_norm": 0.00017934896459337324, |
|
"learning_rate": 0.00019582458291091663, |
|
"loss": 46.0, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.03919292615479157, |
|
"grad_norm": 0.00032432653824798763, |
|
"learning_rate": 0.0001957944857135888, |
|
"loss": 46.0, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.03932948687310095, |
|
"grad_norm": 0.00021099011064507067, |
|
"learning_rate": 0.00019576428276200868, |
|
"loss": 46.0, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.03946604759141033, |
|
"grad_norm": 0.0005440306267701089, |
|
"learning_rate": 0.00019573397408951943, |
|
"loss": 46.0, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.039602608309719706, |
|
"grad_norm": 0.0005642606993205845, |
|
"learning_rate": 0.00019570355972958097, |
|
"loss": 46.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03973916902802909, |
|
"grad_norm": 0.0007911530556157231, |
|
"learning_rate": 0.00019567303971576976, |
|
"loss": 46.0, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.03987572974633847, |
|
"grad_norm": 0.00017880380619317293, |
|
"learning_rate": 0.000195642414081779, |
|
"loss": 46.0, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.04001229046464785, |
|
"grad_norm": 0.00048157072160393, |
|
"learning_rate": 0.00019561168286141856, |
|
"loss": 46.0, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.040148851182957225, |
|
"grad_norm": 0.00010301580186933279, |
|
"learning_rate": 0.00019558084608861472, |
|
"loss": 46.0, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.0402854119012666, |
|
"grad_norm": 0.0003147012903355062, |
|
"learning_rate": 0.00019554990379741033, |
|
"loss": 46.0, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.04042197261957598, |
|
"grad_norm": 0.00039921182906255126, |
|
"learning_rate": 0.0001955188560219648, |
|
"loss": 46.0, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.04055853333788536, |
|
"grad_norm": 0.0004582660039886832, |
|
"learning_rate": 0.00019548770279655397, |
|
"loss": 46.0, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.04069509405619474, |
|
"grad_norm": 0.0001638657267903909, |
|
"learning_rate": 0.00019545644415557, |
|
"loss": 46.0, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.040831654774504116, |
|
"grad_norm": 0.00041633055661804974, |
|
"learning_rate": 0.00019542508013352156, |
|
"loss": 46.0, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.040968215492813494, |
|
"grad_norm": 0.0005134938983246684, |
|
"learning_rate": 0.0001953936107650336, |
|
"loss": 46.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.040968215492813494, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 20.4404, |
|
"eval_samples_per_second": 150.878, |
|
"eval_steps_per_second": 75.439, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04110477621112287, |
|
"grad_norm": 0.00037575350143015385, |
|
"learning_rate": 0.0001953620360848473, |
|
"loss": 46.0, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.04124133692943225, |
|
"grad_norm": 0.00024376453075092286, |
|
"learning_rate": 0.00019533035612782017, |
|
"loss": 46.0, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.04137789764774163, |
|
"grad_norm": 0.00043027085484936833, |
|
"learning_rate": 0.00019529857092892602, |
|
"loss": 46.0, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.041514458366051006, |
|
"grad_norm": 0.00039237432065419853, |
|
"learning_rate": 0.00019526668052325467, |
|
"loss": 46.0, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.041651019084360384, |
|
"grad_norm": 0.0001755830307956785, |
|
"learning_rate": 0.00019523468494601223, |
|
"loss": 46.0, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.04178757980266976, |
|
"grad_norm": 0.0001863718789536506, |
|
"learning_rate": 0.00019520258423252082, |
|
"loss": 46.0, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.04192414052097914, |
|
"grad_norm": 0.0003355692024342716, |
|
"learning_rate": 0.00019517037841821873, |
|
"loss": 46.0, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.04206070123928852, |
|
"grad_norm": 0.0002629165828693658, |
|
"learning_rate": 0.00019513806753866016, |
|
"loss": 46.0, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.0421972619575979, |
|
"grad_norm": 0.0004353369877208024, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 46.0, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.042333822675907275, |
|
"grad_norm": 0.0001725061738397926, |
|
"learning_rate": 0.00019507313072657055, |
|
"loss": 46.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04247038339421665, |
|
"grad_norm": 0.00021753301552962512, |
|
"learning_rate": 0.00019504050486572784, |
|
"loss": 46.0, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.04260694411252603, |
|
"grad_norm": 0.00035614983062259853, |
|
"learning_rate": 0.00019500777408300519, |
|
"loss": 46.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.04274350483083541, |
|
"grad_norm": 0.00025182642275467515, |
|
"learning_rate": 0.00019497493841453642, |
|
"loss": 46.0, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.04288006554914479, |
|
"grad_norm": 0.0003710365854203701, |
|
"learning_rate": 0.0001949419978965711, |
|
"loss": 46.0, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.043016626267454165, |
|
"grad_norm": 0.00031021906761452556, |
|
"learning_rate": 0.00019490895256547464, |
|
"loss": 46.0, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.04315318698576354, |
|
"grad_norm": 0.0002598558203317225, |
|
"learning_rate": 0.000194875802457728, |
|
"loss": 46.0, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.04328974770407292, |
|
"grad_norm": 0.00025477109011262655, |
|
"learning_rate": 0.000194842547609928, |
|
"loss": 46.0, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.0434263084223823, |
|
"grad_norm": 0.0003966864896938205, |
|
"learning_rate": 0.00019480918805878697, |
|
"loss": 46.0, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.04356286914069168, |
|
"grad_norm": 0.0001289808569708839, |
|
"learning_rate": 0.00019477572384113282, |
|
"loss": 46.0, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.043699429859001056, |
|
"grad_norm": 0.00031020533060655, |
|
"learning_rate": 0.00019474215499390912, |
|
"loss": 46.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.043835990577310434, |
|
"grad_norm": 0.00045745153329335153, |
|
"learning_rate": 0.0001947084815541748, |
|
"loss": 46.0, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.04397255129561981, |
|
"grad_norm": 0.00031357730040326715, |
|
"learning_rate": 0.00019467470355910438, |
|
"loss": 46.0, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.04410911201392919, |
|
"grad_norm": 0.00021334455232135952, |
|
"learning_rate": 0.00019464082104598776, |
|
"loss": 46.0, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.04424567273223857, |
|
"grad_norm": 0.00033166687353514135, |
|
"learning_rate": 0.0001946068340522302, |
|
"loss": 46.0, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.04438223345054795, |
|
"grad_norm": 0.0004193445493001491, |
|
"learning_rate": 0.00019457274261535236, |
|
"loss": 46.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.04451879416885733, |
|
"grad_norm": 0.0005776687175966799, |
|
"learning_rate": 0.0001945385467729901, |
|
"loss": 46.0, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.04465535488716671, |
|
"grad_norm": 0.00021739969088230282, |
|
"learning_rate": 0.00019450424656289466, |
|
"loss": 46.0, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.04479191560547609, |
|
"grad_norm": 0.00025186152197420597, |
|
"learning_rate": 0.00019446984202293246, |
|
"loss": 46.0, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.044928476323785466, |
|
"grad_norm": 0.0004120633821003139, |
|
"learning_rate": 0.00019443533319108504, |
|
"loss": 46.0, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.045065037042094844, |
|
"grad_norm": 0.00028226643917150795, |
|
"learning_rate": 0.00019440072010544918, |
|
"loss": 46.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.04520159776040422, |
|
"grad_norm": 0.00048744879313744605, |
|
"learning_rate": 0.00019436600280423665, |
|
"loss": 46.0, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.0453381584787136, |
|
"grad_norm": 0.00019058110774494708, |
|
"learning_rate": 0.0001943311813257743, |
|
"loss": 46.0, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.04547471919702298, |
|
"grad_norm": 0.00047706879558973014, |
|
"learning_rate": 0.00019429625570850404, |
|
"loss": 46.0, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.045611279915332356, |
|
"grad_norm": 0.0005120193236507475, |
|
"learning_rate": 0.0001942612259909827, |
|
"loss": 46.0, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.045747840633641734, |
|
"grad_norm": 0.0002128657652065158, |
|
"learning_rate": 0.00019422609221188207, |
|
"loss": 46.0, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.04588440135195111, |
|
"grad_norm": 0.0005021935794502497, |
|
"learning_rate": 0.00019419085440998873, |
|
"loss": 46.0, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.04602096207026049, |
|
"grad_norm": 0.000529598502907902, |
|
"learning_rate": 0.00019415551262420418, |
|
"loss": 46.0, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.04615752278856987, |
|
"grad_norm": 0.000645966618321836, |
|
"learning_rate": 0.0001941200668935447, |
|
"loss": 46.0, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.04629408350687925, |
|
"grad_norm": 0.0012579227332025766, |
|
"learning_rate": 0.00019408451725714136, |
|
"loss": 46.0, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.046430644225188625, |
|
"grad_norm": 0.0003929708036594093, |
|
"learning_rate": 0.00019404886375423984, |
|
"loss": 46.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.046567204943498, |
|
"grad_norm": 0.0008281446644105017, |
|
"learning_rate": 0.00019401310642420058, |
|
"loss": 46.0, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.04670376566180738, |
|
"grad_norm": 0.0003685842384584248, |
|
"learning_rate": 0.00019397724530649857, |
|
"loss": 46.0, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.04684032638011676, |
|
"grad_norm": 0.00041180921834893525, |
|
"learning_rate": 0.00019394128044072345, |
|
"loss": 46.0, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.04697688709842614, |
|
"grad_norm": 0.00038018723716959357, |
|
"learning_rate": 0.00019390521186657934, |
|
"loss": 46.0, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.047113447816735515, |
|
"grad_norm": 0.0006593601428903639, |
|
"learning_rate": 0.00019386903962388487, |
|
"loss": 46.0, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.04725000853504489, |
|
"grad_norm": 0.0003346616867929697, |
|
"learning_rate": 0.0001938327637525731, |
|
"loss": 46.0, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.04738656925335427, |
|
"grad_norm": 0.0006303668487817049, |
|
"learning_rate": 0.00019379638429269157, |
|
"loss": 46.0, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.04752312997166365, |
|
"grad_norm": 0.0009288009605370462, |
|
"learning_rate": 0.00019375990128440204, |
|
"loss": 46.0, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.04765969068997303, |
|
"grad_norm": 0.0009077245485968888, |
|
"learning_rate": 0.00019372331476798072, |
|
"loss": 46.0, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.047796251408282406, |
|
"grad_norm": 0.0006878585554659367, |
|
"learning_rate": 0.00019368662478381799, |
|
"loss": 46.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.047932812126591784, |
|
"grad_norm": 0.0005428345175459981, |
|
"learning_rate": 0.00019364983137241853, |
|
"loss": 46.0, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.04806937284490116, |
|
"grad_norm": 0.0005264700739644468, |
|
"learning_rate": 0.0001936129345744011, |
|
"loss": 46.0, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.04820593356321054, |
|
"grad_norm": 0.00034603691892698407, |
|
"learning_rate": 0.00019357593443049877, |
|
"loss": 46.0, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.04834249428151992, |
|
"grad_norm": 0.0005551993381232023, |
|
"learning_rate": 0.00019353883098155854, |
|
"loss": 46.0, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.048479054999829296, |
|
"grad_norm": 0.0005989357596263289, |
|
"learning_rate": 0.0001935016242685415, |
|
"loss": 46.0, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.048615615718138674, |
|
"grad_norm": 0.0003730835160240531, |
|
"learning_rate": 0.00019346431433252276, |
|
"loss": 46.0, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.04875217643644805, |
|
"grad_norm": 0.0006811637431383133, |
|
"learning_rate": 0.00019342690121469138, |
|
"loss": 46.0, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.04888873715475743, |
|
"grad_norm": 0.000448873353889212, |
|
"learning_rate": 0.0001933893849563503, |
|
"loss": 46.0, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.049025297873066816, |
|
"grad_norm": 0.00046228739665821195, |
|
"learning_rate": 0.0001933517655989164, |
|
"loss": 46.0, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.049161858591376194, |
|
"grad_norm": 0.000341800187015906, |
|
"learning_rate": 0.00019331404318392027, |
|
"loss": 46.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.04929841930968557, |
|
"grad_norm": 0.0007170014432631433, |
|
"learning_rate": 0.00019327621775300637, |
|
"loss": 46.0, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.04943498002799495, |
|
"grad_norm": 0.0009279394871555269, |
|
"learning_rate": 0.00019323828934793286, |
|
"loss": 46.0, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.04957154074630433, |
|
"grad_norm": 0.0007022646022960544, |
|
"learning_rate": 0.0001932002580105715, |
|
"loss": 46.0, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.049708101464613706, |
|
"grad_norm": 0.0005995425744913518, |
|
"learning_rate": 0.0001931621237829078, |
|
"loss": 46.0, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.049844662182923084, |
|
"grad_norm": 0.0006767901941202581, |
|
"learning_rate": 0.00019312388670704081, |
|
"loss": 46.0, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.04998122290123246, |
|
"grad_norm": 0.00040845770854502916, |
|
"learning_rate": 0.00019308554682518313, |
|
"loss": 46.0, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.05011778361954184, |
|
"grad_norm": 0.00040534368599765003, |
|
"learning_rate": 0.00019304710417966079, |
|
"loss": 46.0, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.05025434433785122, |
|
"grad_norm": 0.00040592235745862126, |
|
"learning_rate": 0.0001930085588129134, |
|
"loss": 46.0, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.0503909050561606, |
|
"grad_norm": 0.00048625541967339814, |
|
"learning_rate": 0.00019296991076749381, |
|
"loss": 46.0, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.050527465774469975, |
|
"grad_norm": 0.0010227779857814312, |
|
"learning_rate": 0.00019293116008606837, |
|
"loss": 46.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.05066402649277935, |
|
"grad_norm": 0.0005206182249821723, |
|
"learning_rate": 0.00019289230681141667, |
|
"loss": 46.0, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.05080058721108873, |
|
"grad_norm": 0.0005789480055682361, |
|
"learning_rate": 0.00019285335098643153, |
|
"loss": 46.0, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.05093714792939811, |
|
"grad_norm": 0.00046148046385496855, |
|
"learning_rate": 0.00019281429265411907, |
|
"loss": 46.0, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.05107370864770749, |
|
"grad_norm": 0.0005185164045542479, |
|
"learning_rate": 0.00019277513185759844, |
|
"loss": 46.0, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.051210269366016865, |
|
"grad_norm": 0.0005086156306788325, |
|
"learning_rate": 0.0001927358686401021, |
|
"loss": 46.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.05134683008432624, |
|
"grad_norm": 0.0005038722883909941, |
|
"learning_rate": 0.0001926965030449754, |
|
"loss": 46.0, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.05148339080263562, |
|
"grad_norm": 0.001132496865466237, |
|
"learning_rate": 0.00019265703511567677, |
|
"loss": 46.0, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.051619951520945, |
|
"grad_norm": 0.0011954177170991898, |
|
"learning_rate": 0.00019261746489577765, |
|
"loss": 46.0, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.05175651223925438, |
|
"grad_norm": 0.0006181654753163457, |
|
"learning_rate": 0.0001925777924289624, |
|
"loss": 46.0, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.051893072957563756, |
|
"grad_norm": 0.0005807424895465374, |
|
"learning_rate": 0.00019253801775902824, |
|
"loss": 46.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.052029633675873134, |
|
"grad_norm": 0.0007045441307127476, |
|
"learning_rate": 0.00019249814092988515, |
|
"loss": 46.0, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.05216619439418251, |
|
"grad_norm": 0.0005537315737456083, |
|
"learning_rate": 0.00019245816198555605, |
|
"loss": 46.0, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.05230275511249189, |
|
"grad_norm": 0.0006181203643791378, |
|
"learning_rate": 0.00019241808097017642, |
|
"loss": 46.0, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.05243931583080127, |
|
"grad_norm": 0.0005752414581365883, |
|
"learning_rate": 0.00019237789792799458, |
|
"loss": 46.0, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.052575876549110646, |
|
"grad_norm": 0.0012608635006472468, |
|
"learning_rate": 0.00019233761290337134, |
|
"loss": 46.0, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.052712437267420025, |
|
"grad_norm": 0.001415454433299601, |
|
"learning_rate": 0.0001922972259407802, |
|
"loss": 46.0, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.0528489979857294, |
|
"grad_norm": 0.001285754144191742, |
|
"learning_rate": 0.00019225673708480717, |
|
"loss": 46.0, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.05298555870403878, |
|
"grad_norm": 0.0007671714411117136, |
|
"learning_rate": 0.00019221614638015075, |
|
"loss": 46.0, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.05312211942234816, |
|
"grad_norm": 0.0009842630242928863, |
|
"learning_rate": 0.0001921754538716218, |
|
"loss": 46.0, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.05325868014065754, |
|
"grad_norm": 0.0023981237318366766, |
|
"learning_rate": 0.00019213465960414368, |
|
"loss": 46.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.053395240858966915, |
|
"grad_norm": 0.0003951654944103211, |
|
"learning_rate": 0.00019209376362275206, |
|
"loss": 46.0, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.05353180157727629, |
|
"grad_norm": 0.0011894862400367856, |
|
"learning_rate": 0.00019205276597259485, |
|
"loss": 46.0, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.05366836229558568, |
|
"grad_norm": 0.0006607277318835258, |
|
"learning_rate": 0.00019201166669893227, |
|
"loss": 46.0, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.053804923013895056, |
|
"grad_norm": 0.0026427984703332186, |
|
"learning_rate": 0.00019197046584713663, |
|
"loss": 46.0, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.053941483732204434, |
|
"grad_norm": 0.0005352182779461145, |
|
"learning_rate": 0.00019192916346269246, |
|
"loss": 46.0, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.05407804445051381, |
|
"grad_norm": 0.0007665826706215739, |
|
"learning_rate": 0.00019188775959119643, |
|
"loss": 46.0, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.05421460516882319, |
|
"grad_norm": 0.0006750879692845047, |
|
"learning_rate": 0.0001918462542783571, |
|
"loss": 46.0, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.05435116588713257, |
|
"grad_norm": 0.0007010844419710338, |
|
"learning_rate": 0.0001918046475699951, |
|
"loss": 46.0, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.05448772660544195, |
|
"grad_norm": 0.0007815820863470435, |
|
"learning_rate": 0.00019176293951204303, |
|
"loss": 46.0, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.054624287323751325, |
|
"grad_norm": 0.0012023310409858823, |
|
"learning_rate": 0.00019172113015054532, |
|
"loss": 46.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0547608480420607, |
|
"grad_norm": 0.0008429251029156148, |
|
"learning_rate": 0.00019167921953165825, |
|
"loss": 46.0, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.05489740876037008, |
|
"grad_norm": 0.0011086566373705864, |
|
"learning_rate": 0.00019163720770164991, |
|
"loss": 46.0, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.05503396947867946, |
|
"grad_norm": 0.0006057535065338016, |
|
"learning_rate": 0.00019159509470690012, |
|
"loss": 46.0, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.05517053019698884, |
|
"grad_norm": 0.0005650485400110483, |
|
"learning_rate": 0.0001915528805939003, |
|
"loss": 46.0, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.055307090915298215, |
|
"grad_norm": 0.00042634617420844734, |
|
"learning_rate": 0.00019151056540925364, |
|
"loss": 46.0, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.055443651633607594, |
|
"grad_norm": 0.0010889278491958976, |
|
"learning_rate": 0.0001914681491996748, |
|
"loss": 46.0, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.05558021235191697, |
|
"grad_norm": 0.0005564565653912723, |
|
"learning_rate": 0.00019142563201199008, |
|
"loss": 46.0, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.05571677307022635, |
|
"grad_norm": 0.0007656107773073018, |
|
"learning_rate": 0.0001913830138931371, |
|
"loss": 46.0, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.05585333378853573, |
|
"grad_norm": 0.0008188536739908159, |
|
"learning_rate": 0.00019134029489016502, |
|
"loss": 46.0, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.055989894506845106, |
|
"grad_norm": 0.0009277886711061001, |
|
"learning_rate": 0.00019129747505023436, |
|
"loss": 46.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.056126455225154484, |
|
"grad_norm": 0.0008007950964383781, |
|
"learning_rate": 0.00019125455442061691, |
|
"loss": 46.0, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.05626301594346386, |
|
"grad_norm": 0.000565837137401104, |
|
"learning_rate": 0.00019121153304869584, |
|
"loss": 46.0, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.05639957666177324, |
|
"grad_norm": 0.0009090682142414153, |
|
"learning_rate": 0.00019116841098196536, |
|
"loss": 46.0, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.05653613738008262, |
|
"grad_norm": 0.0010395282879471779, |
|
"learning_rate": 0.000191125188268031, |
|
"loss": 46.0, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.056672698098391996, |
|
"grad_norm": 0.0004872768186032772, |
|
"learning_rate": 0.00019108186495460933, |
|
"loss": 46.0, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.056809258816701375, |
|
"grad_norm": 0.0007016431773081422, |
|
"learning_rate": 0.00019103844108952803, |
|
"loss": 46.0, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.05694581953501075, |
|
"grad_norm": 0.0005011600442230701, |
|
"learning_rate": 0.00019099491672072566, |
|
"loss": 46.0, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.05708238025332013, |
|
"grad_norm": 0.0009304819977842271, |
|
"learning_rate": 0.00019095129189625193, |
|
"loss": 46.0, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.05721894097162951, |
|
"grad_norm": 0.0009160469635389745, |
|
"learning_rate": 0.0001909075666642673, |
|
"loss": 46.0, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.05735550168993889, |
|
"grad_norm": 0.001117922831326723, |
|
"learning_rate": 0.00019086374107304312, |
|
"loss": 46.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.057492062408248265, |
|
"grad_norm": 0.0009813562501221895, |
|
"learning_rate": 0.00019081981517096152, |
|
"loss": 46.0, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.05762862312655764, |
|
"grad_norm": 0.0006970268441364169, |
|
"learning_rate": 0.00019077578900651544, |
|
"loss": 46.0, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.05776518384486702, |
|
"grad_norm": 0.0004595222999341786, |
|
"learning_rate": 0.00019073166262830834, |
|
"loss": 46.0, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.0579017445631764, |
|
"grad_norm": 0.0009433355298824608, |
|
"learning_rate": 0.00019068743608505455, |
|
"loss": 46.0, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.05803830528148578, |
|
"grad_norm": 0.001090813777409494, |
|
"learning_rate": 0.00019064310942557878, |
|
"loss": 46.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.058174865999795156, |
|
"grad_norm": 0.00044983444968238473, |
|
"learning_rate": 0.0001905986826988164, |
|
"loss": 46.0, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.05831142671810454, |
|
"grad_norm": 0.0008178472053259611, |
|
"learning_rate": 0.00019055415595381305, |
|
"loss": 46.0, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.05844798743641392, |
|
"grad_norm": 0.0005727699608542025, |
|
"learning_rate": 0.0001905095292397251, |
|
"loss": 46.0, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.0585845481547233, |
|
"grad_norm": 0.0013587451539933681, |
|
"learning_rate": 0.00019046480260581902, |
|
"loss": 46.0, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.058721108873032675, |
|
"grad_norm": 0.0004678687546402216, |
|
"learning_rate": 0.00019041997610147167, |
|
"loss": 46.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.05885766959134205, |
|
"grad_norm": 0.00045540923019871116, |
|
"learning_rate": 0.0001903750497761702, |
|
"loss": 46.0, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.05899423030965143, |
|
"grad_norm": 0.0006064171902835369, |
|
"learning_rate": 0.00019033002367951194, |
|
"loss": 46.0, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.05913079102796081, |
|
"grad_norm": 0.0002765974495559931, |
|
"learning_rate": 0.0001902848978612043, |
|
"loss": 46.0, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.05926735174627019, |
|
"grad_norm": 0.000879188475664705, |
|
"learning_rate": 0.00019023967237106492, |
|
"loss": 46.0, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.059403912464579565, |
|
"grad_norm": 0.0005185411428101361, |
|
"learning_rate": 0.00019019434725902137, |
|
"loss": 46.0, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.059540473182888944, |
|
"grad_norm": 0.0008257310255430639, |
|
"learning_rate": 0.00019014892257511118, |
|
"loss": 46.0, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.05967703390119832, |
|
"grad_norm": 0.0009346020524390042, |
|
"learning_rate": 0.00019010339836948186, |
|
"loss": 46.0, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.0598135946195077, |
|
"grad_norm": 0.0010087540140375495, |
|
"learning_rate": 0.00019005777469239076, |
|
"loss": 46.0, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.05995015533781708, |
|
"grad_norm": 0.0015897548291832209, |
|
"learning_rate": 0.00019001205159420513, |
|
"loss": 46.0, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.060086716056126456, |
|
"grad_norm": 0.0007276605465449393, |
|
"learning_rate": 0.0001899662291254018, |
|
"loss": 46.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.060223276774435834, |
|
"grad_norm": 0.0019181488314643502, |
|
"learning_rate": 0.00018992030733656746, |
|
"loss": 46.0, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.06035983749274521, |
|
"grad_norm": 0.0009860562859103084, |
|
"learning_rate": 0.00018987428627839843, |
|
"loss": 46.0, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.06049639821105459, |
|
"grad_norm": 0.0009881850564852357, |
|
"learning_rate": 0.0001898281660017005, |
|
"loss": 46.0, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.06063295892936397, |
|
"grad_norm": 0.0016714398516342044, |
|
"learning_rate": 0.00018978194655738917, |
|
"loss": 46.0, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.060769519647673347, |
|
"grad_norm": 0.000832723337225616, |
|
"learning_rate": 0.00018973562799648927, |
|
"loss": 46.0, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.060906080365982725, |
|
"grad_norm": 0.0007889857515692711, |
|
"learning_rate": 0.00018968921037013512, |
|
"loss": 46.0, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.0610426410842921, |
|
"grad_norm": 0.001141861779615283, |
|
"learning_rate": 0.00018964269372957038, |
|
"loss": 46.0, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.06117920180260148, |
|
"grad_norm": 0.001163587556220591, |
|
"learning_rate": 0.00018959607812614807, |
|
"loss": 46.0, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.06131576252091086, |
|
"grad_norm": 0.0006048906943760812, |
|
"learning_rate": 0.0001895493636113304, |
|
"loss": 46.0, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.06145232323922024, |
|
"grad_norm": 0.0009424586896784604, |
|
"learning_rate": 0.00018950255023668876, |
|
"loss": 46.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.061588883957529615, |
|
"grad_norm": 0.000706047285348177, |
|
"learning_rate": 0.00018945563805390381, |
|
"loss": 46.0, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.06172544467583899, |
|
"grad_norm": 0.000651683600153774, |
|
"learning_rate": 0.00018940862711476513, |
|
"loss": 46.0, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.06186200539414837, |
|
"grad_norm": 0.001372483093291521, |
|
"learning_rate": 0.00018936151747117141, |
|
"loss": 46.0, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.06199856611245775, |
|
"grad_norm": 0.0011862257961183786, |
|
"learning_rate": 0.0001893143091751303, |
|
"loss": 46.0, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.06213512683076713, |
|
"grad_norm": 0.0015178897883743048, |
|
"learning_rate": 0.00018926700227875833, |
|
"loss": 46.0, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.062271687549076506, |
|
"grad_norm": 0.0008582820883020759, |
|
"learning_rate": 0.0001892195968342809, |
|
"loss": 46.0, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.062408248267385884, |
|
"grad_norm": 0.000795324333012104, |
|
"learning_rate": 0.00018917209289403227, |
|
"loss": 46.0, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.06254480898569527, |
|
"grad_norm": 0.001082447823137045, |
|
"learning_rate": 0.00018912449051045527, |
|
"loss": 46.0, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.06268136970400465, |
|
"grad_norm": 0.001177507103420794, |
|
"learning_rate": 0.00018907678973610156, |
|
"loss": 46.0, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.06281793042231403, |
|
"grad_norm": 0.0007584211998619139, |
|
"learning_rate": 0.00018902899062363143, |
|
"loss": 46.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.0629544911406234, |
|
"grad_norm": 0.0005909963510930538, |
|
"learning_rate": 0.00018898109322581356, |
|
"loss": 46.0, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.06309105185893278, |
|
"grad_norm": 0.0007048872066661716, |
|
"learning_rate": 0.0001889330975955253, |
|
"loss": 46.0, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.06322761257724216, |
|
"grad_norm": 0.0005063001881353557, |
|
"learning_rate": 0.0001888850037857524, |
|
"loss": 46.0, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.06336417329555154, |
|
"grad_norm": 0.000993978581391275, |
|
"learning_rate": 0.000188836811849589, |
|
"loss": 46.0, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.06350073401386092, |
|
"grad_norm": 0.0010828451486304402, |
|
"learning_rate": 0.0001887885218402375, |
|
"loss": 46.0, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.0636372947321703, |
|
"grad_norm": 0.0008432026370428503, |
|
"learning_rate": 0.00018874013381100875, |
|
"loss": 46.0, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.06377385545047967, |
|
"grad_norm": 0.0007918166811577976, |
|
"learning_rate": 0.00018869164781532157, |
|
"loss": 46.0, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.06391041616878905, |
|
"grad_norm": 0.0007451958954334259, |
|
"learning_rate": 0.00018864306390670307, |
|
"loss": 46.0, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.06404697688709843, |
|
"grad_norm": 0.0007615243084728718, |
|
"learning_rate": 0.00018859438213878849, |
|
"loss": 46.0, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.0641835376054078, |
|
"grad_norm": 0.00018180711776949465, |
|
"learning_rate": 0.000188545602565321, |
|
"loss": 46.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.06432009832371718, |
|
"grad_norm": 0.0009969191160053015, |
|
"learning_rate": 0.0001884967252401518, |
|
"loss": 46.0, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.06445665904202656, |
|
"grad_norm": 0.0008999738493002951, |
|
"learning_rate": 0.00018844775021724004, |
|
"loss": 46.0, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.06459321976033594, |
|
"grad_norm": 0.0006670115399174392, |
|
"learning_rate": 0.00018839867755065265, |
|
"loss": 46.0, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.06472978047864532, |
|
"grad_norm": 0.0011296794982627034, |
|
"learning_rate": 0.00018834950729456433, |
|
"loss": 46.0, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.0648663411969547, |
|
"grad_norm": 0.0004901388892903924, |
|
"learning_rate": 0.0001883002395032577, |
|
"loss": 46.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.06500290191526407, |
|
"grad_norm": 0.0017039328813552856, |
|
"learning_rate": 0.00018825087423112282, |
|
"loss": 46.0, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.06513946263357345, |
|
"grad_norm": 0.0009402755531482399, |
|
"learning_rate": 0.00018820141153265754, |
|
"loss": 46.0, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.06527602335188283, |
|
"grad_norm": 0.0008238424779847264, |
|
"learning_rate": 0.00018815185146246716, |
|
"loss": 46.0, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.06541258407019221, |
|
"grad_norm": 0.0007881290512159467, |
|
"learning_rate": 0.00018810219407526456, |
|
"loss": 46.0, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.06554914478850159, |
|
"grad_norm": 0.0010467070387676358, |
|
"learning_rate": 0.00018805243942587, |
|
"loss": 46.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.06568570550681097, |
|
"grad_norm": 0.0009283073595725, |
|
"learning_rate": 0.0001880025875692111, |
|
"loss": 46.0, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.06582226622512034, |
|
"grad_norm": 0.000698375515639782, |
|
"learning_rate": 0.00018795263856032288, |
|
"loss": 46.0, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.06595882694342972, |
|
"grad_norm": 0.0015646845567971468, |
|
"learning_rate": 0.00018790259245434748, |
|
"loss": 46.0, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.0660953876617391, |
|
"grad_norm": 0.0008225208730436862, |
|
"learning_rate": 0.00018785244930653438, |
|
"loss": 46.0, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.06623194838004848, |
|
"grad_norm": 0.0006926036439836025, |
|
"learning_rate": 0.00018780220917224006, |
|
"loss": 46.0, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.06636850909835786, |
|
"grad_norm": 0.000991306733340025, |
|
"learning_rate": 0.00018775187210692815, |
|
"loss": 46.0, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.06650506981666723, |
|
"grad_norm": 0.0006662954692728817, |
|
"learning_rate": 0.00018770143816616928, |
|
"loss": 46.0, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.06664163053497661, |
|
"grad_norm": 0.000875332101713866, |
|
"learning_rate": 0.000187650907405641, |
|
"loss": 46.0, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.06677819125328599, |
|
"grad_norm": 0.002434828784316778, |
|
"learning_rate": 0.00018760027988112775, |
|
"loss": 46.0, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.06691475197159537, |
|
"grad_norm": 0.002048447262495756, |
|
"learning_rate": 0.0001875495556485208, |
|
"loss": 46.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.06705131268990475, |
|
"grad_norm": 0.0005600312724709511, |
|
"learning_rate": 0.00018749873476381828, |
|
"loss": 46.0, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.06718787340821412, |
|
"grad_norm": 0.00041424104711040854, |
|
"learning_rate": 0.00018744781728312479, |
|
"loss": 46.0, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.0673244341265235, |
|
"grad_norm": 0.0009647294646129012, |
|
"learning_rate": 0.0001873968032626518, |
|
"loss": 46.0, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.06746099484483288, |
|
"grad_norm": 0.0007067355909384787, |
|
"learning_rate": 0.00018734569275871726, |
|
"loss": 46.0, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.06759755556314226, |
|
"grad_norm": 0.000527632946614176, |
|
"learning_rate": 0.00018729448582774559, |
|
"loss": 46.0, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.06773411628145164, |
|
"grad_norm": 0.000945943349506706, |
|
"learning_rate": 0.00018724318252626775, |
|
"loss": 46.0, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.06787067699976101, |
|
"grad_norm": 0.0016438892344012856, |
|
"learning_rate": 0.00018719178291092106, |
|
"loss": 46.0, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.06800723771807039, |
|
"grad_norm": 0.0018535932758823037, |
|
"learning_rate": 0.00018714028703844914, |
|
"loss": 46.0, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.06814379843637977, |
|
"grad_norm": 0.0012148134410381317, |
|
"learning_rate": 0.00018708869496570192, |
|
"loss": 46.0, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.06828035915468915, |
|
"grad_norm": 0.0008952637435868382, |
|
"learning_rate": 0.00018703700674963547, |
|
"loss": 46.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06841691987299853, |
|
"grad_norm": 0.000657785392832011, |
|
"learning_rate": 0.0001869852224473121, |
|
"loss": 46.0, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.0685534805913079, |
|
"grad_norm": 0.0008411743910983205, |
|
"learning_rate": 0.00018693334211590006, |
|
"loss": 46.0, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.06869004130961728, |
|
"grad_norm": 0.0008810686995275319, |
|
"learning_rate": 0.00018688136581267373, |
|
"loss": 46.0, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.06882660202792666, |
|
"grad_norm": 0.0005853201728314161, |
|
"learning_rate": 0.00018682929359501338, |
|
"loss": 46.0, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.06896316274623604, |
|
"grad_norm": 0.0004718601703643799, |
|
"learning_rate": 0.00018677712552040522, |
|
"loss": 46.0, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.06909972346454542, |
|
"grad_norm": 0.0007315895054489374, |
|
"learning_rate": 0.00018672486164644116, |
|
"loss": 46.0, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.0692362841828548, |
|
"grad_norm": 0.0009641025681048632, |
|
"learning_rate": 0.00018667250203081905, |
|
"loss": 46.0, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.06937284490116417, |
|
"grad_norm": 0.0008396725752390921, |
|
"learning_rate": 0.00018662004673134232, |
|
"loss": 46.0, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.06950940561947355, |
|
"grad_norm": 0.000584542634896934, |
|
"learning_rate": 0.00018656749580592003, |
|
"loss": 46.0, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.06964596633778293, |
|
"grad_norm": 0.0005784454988315701, |
|
"learning_rate": 0.00018651484931256685, |
|
"loss": 46.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.06978252705609231, |
|
"grad_norm": 0.0003768317692447454, |
|
"learning_rate": 0.00018646210730940293, |
|
"loss": 46.0, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.06991908777440169, |
|
"grad_norm": 0.0012037859996780753, |
|
"learning_rate": 0.00018640926985465388, |
|
"loss": 46.0, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.07005564849271108, |
|
"grad_norm": 0.0007435141014866531, |
|
"learning_rate": 0.00018635633700665063, |
|
"loss": 46.0, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.07019220921102046, |
|
"grad_norm": 0.0009005676256492734, |
|
"learning_rate": 0.00018630330882382951, |
|
"loss": 46.0, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.07032876992932983, |
|
"grad_norm": 0.0005016371724195778, |
|
"learning_rate": 0.00018625018536473206, |
|
"loss": 46.0, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.07046533064763921, |
|
"grad_norm": 0.0012827449245378375, |
|
"learning_rate": 0.00018619696668800492, |
|
"loss": 46.0, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.07060189136594859, |
|
"grad_norm": 0.0004301304288674146, |
|
"learning_rate": 0.0001861436528524, |
|
"loss": 46.0, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.07073845208425797, |
|
"grad_norm": 0.00036730722058564425, |
|
"learning_rate": 0.00018609024391677418, |
|
"loss": 46.0, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.07087501280256735, |
|
"grad_norm": 0.0008029394666664302, |
|
"learning_rate": 0.00018603673994008925, |
|
"loss": 46.0, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.07101157352087673, |
|
"grad_norm": 0.0006513711996376514, |
|
"learning_rate": 0.00018598314098141206, |
|
"loss": 46.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.0711481342391861, |
|
"grad_norm": 0.00077268440509215, |
|
"learning_rate": 0.00018592944709991426, |
|
"loss": 46.0, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.07128469495749548, |
|
"grad_norm": 0.0004973417380824685, |
|
"learning_rate": 0.00018587565835487233, |
|
"loss": 46.0, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.07142125567580486, |
|
"grad_norm": 0.000682865793351084, |
|
"learning_rate": 0.00018582177480566735, |
|
"loss": 46.0, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.07155781639411424, |
|
"grad_norm": 0.0006797179230488837, |
|
"learning_rate": 0.00018576779651178522, |
|
"loss": 46.0, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.07169437711242362, |
|
"grad_norm": 0.000731765350792557, |
|
"learning_rate": 0.00018571372353281632, |
|
"loss": 46.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.071830937830733, |
|
"grad_norm": 0.00029318922315724194, |
|
"learning_rate": 0.00018565955592845563, |
|
"loss": 46.0, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.07196749854904237, |
|
"grad_norm": 0.0011708553647622466, |
|
"learning_rate": 0.00018560529375850259, |
|
"loss": 46.0, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.07210405926735175, |
|
"grad_norm": 0.00033764285035431385, |
|
"learning_rate": 0.00018555093708286094, |
|
"loss": 46.0, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.07224061998566113, |
|
"grad_norm": 0.0011742584174498916, |
|
"learning_rate": 0.00018549648596153891, |
|
"loss": 46.0, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.0723771807039705, |
|
"grad_norm": 0.0011071540648117661, |
|
"learning_rate": 0.00018544194045464886, |
|
"loss": 46.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.07251374142227988, |
|
"grad_norm": 0.0004732667875941843, |
|
"learning_rate": 0.00018538730062240744, |
|
"loss": 46.0, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.07265030214058926, |
|
"grad_norm": 0.0006753505440428853, |
|
"learning_rate": 0.00018533256652513536, |
|
"loss": 46.0, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.07278686285889864, |
|
"grad_norm": 0.0013869997346773744, |
|
"learning_rate": 0.00018527773822325742, |
|
"loss": 46.0, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.07292342357720802, |
|
"grad_norm": 0.0004685988824348897, |
|
"learning_rate": 0.0001852228157773025, |
|
"loss": 46.0, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.0730599842955174, |
|
"grad_norm": 0.0005608046194538474, |
|
"learning_rate": 0.00018516779924790324, |
|
"loss": 46.0, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.07319654501382677, |
|
"grad_norm": 0.0007237467798404396, |
|
"learning_rate": 0.00018511268869579635, |
|
"loss": 46.0, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.07333310573213615, |
|
"grad_norm": 0.0007371974061243236, |
|
"learning_rate": 0.00018505748418182213, |
|
"loss": 46.0, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.07346966645044553, |
|
"grad_norm": 0.0021626208908855915, |
|
"learning_rate": 0.0001850021857669248, |
|
"loss": 46.0, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.07360622716875491, |
|
"grad_norm": 0.0044371578842401505, |
|
"learning_rate": 0.0001849467935121521, |
|
"loss": 46.0, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.07374278788706429, |
|
"grad_norm": 0.001232149894349277, |
|
"learning_rate": 0.00018489130747865548, |
|
"loss": 46.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.07387934860537367, |
|
"grad_norm": 0.0007343983743339777, |
|
"learning_rate": 0.00018483572772768982, |
|
"loss": 46.0, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.07401590932368304, |
|
"grad_norm": 0.00047310179797932506, |
|
"learning_rate": 0.00018478005432061352, |
|
"loss": 46.0, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.07415247004199242, |
|
"grad_norm": 0.0008101825369521976, |
|
"learning_rate": 0.00018472428731888837, |
|
"loss": 46.0, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.0742890307603018, |
|
"grad_norm": 0.001507743145339191, |
|
"learning_rate": 0.00018466842678407946, |
|
"loss": 46.0, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.07442559147861118, |
|
"grad_norm": 0.0009909087093546987, |
|
"learning_rate": 0.00018461247277785513, |
|
"loss": 46.0, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.07456215219692056, |
|
"grad_norm": 0.0013652259949594736, |
|
"learning_rate": 0.000184556425361987, |
|
"loss": 46.0, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.07469871291522993, |
|
"grad_norm": 0.0008130900096148252, |
|
"learning_rate": 0.0001845002845983497, |
|
"loss": 46.0, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.07483527363353931, |
|
"grad_norm": 0.0012740385718643665, |
|
"learning_rate": 0.00018444405054892092, |
|
"loss": 46.0, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.07497183435184869, |
|
"grad_norm": 0.000746730831451714, |
|
"learning_rate": 0.0001843877232757815, |
|
"loss": 46.0, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.07510839507015807, |
|
"grad_norm": 0.0019112954614683986, |
|
"learning_rate": 0.0001843313028411149, |
|
"loss": 46.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.07524495578846745, |
|
"grad_norm": 0.0005015329807065427, |
|
"learning_rate": 0.0001842747893072077, |
|
"loss": 46.0, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.07538151650677682, |
|
"grad_norm": 0.0004962061066180468, |
|
"learning_rate": 0.00018421818273644912, |
|
"loss": 46.0, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.0755180772250862, |
|
"grad_norm": 0.00037836559931747615, |
|
"learning_rate": 0.00018416148319133117, |
|
"loss": 46.0, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.07565463794339558, |
|
"grad_norm": 0.0003992395068053156, |
|
"learning_rate": 0.0001841046907344484, |
|
"loss": 46.0, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.07579119866170496, |
|
"grad_norm": 0.0004277300031390041, |
|
"learning_rate": 0.000184047805428498, |
|
"loss": 46.0, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.07592775938001434, |
|
"grad_norm": 0.0005832565948367119, |
|
"learning_rate": 0.00018399082733627965, |
|
"loss": 46.0, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.07606432009832371, |
|
"grad_norm": 0.0006913339020684361, |
|
"learning_rate": 0.00018393375652069548, |
|
"loss": 46.0, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.0762008808166331, |
|
"grad_norm": 0.00086840835865587, |
|
"learning_rate": 0.00018387659304474994, |
|
"loss": 46.0, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.07633744153494247, |
|
"grad_norm": 0.0007002664497122169, |
|
"learning_rate": 0.00018381933697154986, |
|
"loss": 46.0, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.07647400225325185, |
|
"grad_norm": 0.0008870341116562486, |
|
"learning_rate": 0.00018376198836430417, |
|
"loss": 46.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.07661056297156123, |
|
"grad_norm": 0.0006294609629549086, |
|
"learning_rate": 0.00018370454728632404, |
|
"loss": 46.0, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.0767471236898706, |
|
"grad_norm": 0.0004686205938924104, |
|
"learning_rate": 0.00018364701380102266, |
|
"loss": 46.0, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.07688368440817998, |
|
"grad_norm": 0.0008394161704927683, |
|
"learning_rate": 0.0001835893879719154, |
|
"loss": 46.0, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.07702024512648936, |
|
"grad_norm": 0.0006610259879380465, |
|
"learning_rate": 0.00018353166986261935, |
|
"loss": 46.0, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.07715680584479874, |
|
"grad_norm": 0.000717335322406143, |
|
"learning_rate": 0.0001834738595368536, |
|
"loss": 46.0, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.07729336656310812, |
|
"grad_norm": 0.0005708184908144176, |
|
"learning_rate": 0.00018341595705843907, |
|
"loss": 46.0, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.0774299272814175, |
|
"grad_norm": 0.0009451278601773083, |
|
"learning_rate": 0.00018335796249129832, |
|
"loss": 46.0, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.07756648799972687, |
|
"grad_norm": 0.0005919419345445931, |
|
"learning_rate": 0.0001832998758994556, |
|
"loss": 46.0, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.07770304871803625, |
|
"grad_norm": 0.0006907092756591737, |
|
"learning_rate": 0.00018324169734703683, |
|
"loss": 46.0, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.07783960943634563, |
|
"grad_norm": 0.0008807244012132287, |
|
"learning_rate": 0.00018318342689826938, |
|
"loss": 46.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.07797617015465501, |
|
"grad_norm": 0.0006545766955241561, |
|
"learning_rate": 0.00018312506461748207, |
|
"loss": 46.0, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.07811273087296439, |
|
"grad_norm": 0.00029055686900392175, |
|
"learning_rate": 0.0001830666105691051, |
|
"loss": 46.0, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.07824929159127376, |
|
"grad_norm": 0.000587637594435364, |
|
"learning_rate": 0.00018300806481767005, |
|
"loss": 46.0, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.07838585230958314, |
|
"grad_norm": 0.0003420762368477881, |
|
"learning_rate": 0.00018294942742780966, |
|
"loss": 46.0, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.07852241302789252, |
|
"grad_norm": 0.00046790859778411686, |
|
"learning_rate": 0.00018289069846425782, |
|
"loss": 46.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.0786589737462019, |
|
"grad_norm": 0.00026647665072232485, |
|
"learning_rate": 0.00018283187799184958, |
|
"loss": 46.0, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.07879553446451128, |
|
"grad_norm": 0.0015470724320039153, |
|
"learning_rate": 0.000182772966075521, |
|
"loss": 46.0, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.07893209518282066, |
|
"grad_norm": 0.00032806419767439365, |
|
"learning_rate": 0.00018271396278030905, |
|
"loss": 46.0, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.07906865590113003, |
|
"grad_norm": 0.0004820665344595909, |
|
"learning_rate": 0.0001826548681713516, |
|
"loss": 46.0, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.07920521661943941, |
|
"grad_norm": 0.0006422748556360602, |
|
"learning_rate": 0.00018259568231388738, |
|
"loss": 46.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.0793417773377488, |
|
"grad_norm": 0.0002866037539206445, |
|
"learning_rate": 0.00018253640527325578, |
|
"loss": 46.0, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.07947833805605818, |
|
"grad_norm": 0.0005615049158222973, |
|
"learning_rate": 0.00018247703711489686, |
|
"loss": 46.0, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.07961489877436756, |
|
"grad_norm": 0.001207325840368867, |
|
"learning_rate": 0.00018241757790435134, |
|
"loss": 46.0, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.07975145949267694, |
|
"grad_norm": 0.0005774226738139987, |
|
"learning_rate": 0.00018235802770726037, |
|
"loss": 46.0, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.07988802021098632, |
|
"grad_norm": 0.0005512305651791394, |
|
"learning_rate": 0.00018229838658936564, |
|
"loss": 46.0, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.0800245809292957, |
|
"grad_norm": 0.0021268404088914394, |
|
"learning_rate": 0.00018223865461650912, |
|
"loss": 46.0, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.08016114164760507, |
|
"grad_norm": 0.0017917539225891232, |
|
"learning_rate": 0.00018217883185463315, |
|
"loss": 46.0, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.08029770236591445, |
|
"grad_norm": 0.004704636055976152, |
|
"learning_rate": 0.00018211891836978028, |
|
"loss": 46.0, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.08043426308422383, |
|
"grad_norm": 0.0016004083445295691, |
|
"learning_rate": 0.00018205891422809316, |
|
"loss": 46.0, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.0805708238025332, |
|
"grad_norm": 0.0008631657692603767, |
|
"learning_rate": 0.0001819988194958146, |
|
"loss": 46.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.08070738452084258, |
|
"grad_norm": 0.000875883677508682, |
|
"learning_rate": 0.00018193863423928742, |
|
"loss": 46.0, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.08084394523915196, |
|
"grad_norm": 0.0007616846705786884, |
|
"learning_rate": 0.0001818783585249543, |
|
"loss": 46.0, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.08098050595746134, |
|
"grad_norm": 0.0017507924931123853, |
|
"learning_rate": 0.00018181799241935787, |
|
"loss": 46.0, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.08111706667577072, |
|
"grad_norm": 0.0015062005259096622, |
|
"learning_rate": 0.00018175753598914047, |
|
"loss": 46.0, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.0812536273940801, |
|
"grad_norm": 0.0009979240130633116, |
|
"learning_rate": 0.0001816969893010442, |
|
"loss": 46.0, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.08139018811238947, |
|
"grad_norm": 0.0014799052150920033, |
|
"learning_rate": 0.00018163635242191085, |
|
"loss": 46.0, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.08152674883069885, |
|
"grad_norm": 0.002407307270914316, |
|
"learning_rate": 0.00018157562541868164, |
|
"loss": 46.0, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.08166330954900823, |
|
"grad_norm": 0.001063771778717637, |
|
"learning_rate": 0.00018151480835839741, |
|
"loss": 46.0, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.08179987026731761, |
|
"grad_norm": 0.00132046639919281, |
|
"learning_rate": 0.0001814539013081984, |
|
"loss": 46.0, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.08193643098562699, |
|
"grad_norm": 0.001031374093145132, |
|
"learning_rate": 0.00018139290433532416, |
|
"loss": 46.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08193643098562699, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 20.7447, |
|
"eval_samples_per_second": 148.665, |
|
"eval_steps_per_second": 74.332, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08207299170393637, |
|
"grad_norm": 0.0006209348211996257, |
|
"learning_rate": 0.00018133181750711348, |
|
"loss": 46.0, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.08220955242224574, |
|
"grad_norm": 0.0009648207924328744, |
|
"learning_rate": 0.00018127064089100447, |
|
"loss": 46.0, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.08234611314055512, |
|
"grad_norm": 0.0010155495256185532, |
|
"learning_rate": 0.00018120937455453424, |
|
"loss": 46.0, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.0824826738588645, |
|
"grad_norm": 0.0008265697979368269, |
|
"learning_rate": 0.000181148018565339, |
|
"loss": 46.0, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.08261923457717388, |
|
"grad_norm": 0.0006557560409419239, |
|
"learning_rate": 0.00018108657299115396, |
|
"loss": 46.0, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.08275579529548326, |
|
"grad_norm": 0.0003027521597687155, |
|
"learning_rate": 0.0001810250378998132, |
|
"loss": 46.0, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.08289235601379263, |
|
"grad_norm": 0.00038604583824053407, |
|
"learning_rate": 0.00018096341335924955, |
|
"loss": 46.0, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.08302891673210201, |
|
"grad_norm": 0.0011505828006193042, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 46.0, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.08316547745041139, |
|
"grad_norm": 0.00021457420371007174, |
|
"learning_rate": 0.00018083989620267907, |
|
"loss": 46.0, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.08330203816872077, |
|
"grad_norm": 0.000474292115541175, |
|
"learning_rate": 0.0001807780037230315, |
|
"loss": 46.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.08343859888703015, |
|
"grad_norm": 0.00048646898358128965, |
|
"learning_rate": 0.00018071602206687942, |
|
"loss": 46.0, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.08357515960533952, |
|
"grad_norm": 0.0005354878958314657, |
|
"learning_rate": 0.00018065395130264876, |
|
"loss": 46.0, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.0837117203236489, |
|
"grad_norm": 0.00048499630065634847, |
|
"learning_rate": 0.00018059179149886378, |
|
"loss": 46.0, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.08384828104195828, |
|
"grad_norm": 0.0008654086268506944, |
|
"learning_rate": 0.00018052954272414706, |
|
"loss": 46.0, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.08398484176026766, |
|
"grad_norm": 0.0005253584822639823, |
|
"learning_rate": 0.0001804672050472194, |
|
"loss": 46.0, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.08412140247857704, |
|
"grad_norm": 0.0017661650199443102, |
|
"learning_rate": 0.0001804047785368997, |
|
"loss": 46.0, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.08425796319688642, |
|
"grad_norm": 0.0003864025929942727, |
|
"learning_rate": 0.00018034226326210496, |
|
"loss": 46.0, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.0843945239151958, |
|
"grad_norm": 0.000856466474942863, |
|
"learning_rate": 0.00018027965929185024, |
|
"loss": 46.0, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.08453108463350517, |
|
"grad_norm": 0.0004072580486536026, |
|
"learning_rate": 0.00018021696669524842, |
|
"loss": 46.0, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.08466764535181455, |
|
"grad_norm": 0.0010099663631990552, |
|
"learning_rate": 0.0001801541855415102, |
|
"loss": 46.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.08480420607012393, |
|
"grad_norm": 0.001016309019178152, |
|
"learning_rate": 0.00018009131589994418, |
|
"loss": 46.0, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.0849407667884333, |
|
"grad_norm": 0.001154162921011448, |
|
"learning_rate": 0.00018002835783995652, |
|
"loss": 46.0, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.08507732750674268, |
|
"grad_norm": 0.0006390147027559578, |
|
"learning_rate": 0.00017996531143105106, |
|
"loss": 46.0, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.08521388822505206, |
|
"grad_norm": 0.0012196438619866967, |
|
"learning_rate": 0.00017990217674282915, |
|
"loss": 46.0, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.08535044894336144, |
|
"grad_norm": 0.0003661528753582388, |
|
"learning_rate": 0.0001798389538449896, |
|
"loss": 46.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.08548700966167082, |
|
"grad_norm": 0.0006461184821091592, |
|
"learning_rate": 0.0001797756428073286, |
|
"loss": 46.0, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.0856235703799802, |
|
"grad_norm": 0.0010678149992600083, |
|
"learning_rate": 0.00017971224369973964, |
|
"loss": 46.0, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.08576013109828957, |
|
"grad_norm": 0.0006297577056102455, |
|
"learning_rate": 0.00017964875659221344, |
|
"loss": 46.0, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.08589669181659895, |
|
"grad_norm": 0.000396767514757812, |
|
"learning_rate": 0.00017958518155483788, |
|
"loss": 46.0, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.08603325253490833, |
|
"grad_norm": 0.0011105970479547977, |
|
"learning_rate": 0.00017952151865779792, |
|
"loss": 46.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.08616981325321771, |
|
"grad_norm": 0.0012717852368950844, |
|
"learning_rate": 0.00017945776797137543, |
|
"loss": 46.0, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.08630637397152709, |
|
"grad_norm": 0.0007417348097078502, |
|
"learning_rate": 0.00017939392956594933, |
|
"loss": 46.0, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.08644293468983646, |
|
"grad_norm": 0.00036522196023724973, |
|
"learning_rate": 0.00017933000351199533, |
|
"loss": 46.0, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.08657949540814584, |
|
"grad_norm": 0.0013521965593099594, |
|
"learning_rate": 0.00017926598988008582, |
|
"loss": 46.0, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.08671605612645522, |
|
"grad_norm": 0.0007052735309116542, |
|
"learning_rate": 0.00017920188874089, |
|
"loss": 46.0, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.0868526168447646, |
|
"grad_norm": 0.0010762631427496672, |
|
"learning_rate": 0.00017913770016517354, |
|
"loss": 46.0, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.08698917756307398, |
|
"grad_norm": 0.002226311946287751, |
|
"learning_rate": 0.0001790734242237988, |
|
"loss": 46.0, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.08712573828138336, |
|
"grad_norm": 0.004666191525757313, |
|
"learning_rate": 0.00017900906098772444, |
|
"loss": 46.0, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.08726229899969273, |
|
"grad_norm": 0.0010003356728702784, |
|
"learning_rate": 0.00017894461052800557, |
|
"loss": 46.0, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.08739885971800211, |
|
"grad_norm": 0.0011451984755694866, |
|
"learning_rate": 0.00017888007291579357, |
|
"loss": 46.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.08753542043631149, |
|
"grad_norm": 0.0011838976060971618, |
|
"learning_rate": 0.000178815448222336, |
|
"loss": 46.0, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.08767198115462087, |
|
"grad_norm": 0.0011408502468839288, |
|
"learning_rate": 0.0001787507365189767, |
|
"loss": 46.0, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.08780854187293025, |
|
"grad_norm": 0.0007302387966774404, |
|
"learning_rate": 0.00017868593787715535, |
|
"loss": 46.0, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.08794510259123962, |
|
"grad_norm": 0.0010645872680470347, |
|
"learning_rate": 0.00017862105236840777, |
|
"loss": 46.0, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.088081663309549, |
|
"grad_norm": 0.0014955222140997648, |
|
"learning_rate": 0.00017855608006436558, |
|
"loss": 46.0, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.08821822402785838, |
|
"grad_norm": 0.0011010526213794947, |
|
"learning_rate": 0.0001784910210367563, |
|
"loss": 46.0, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.08835478474616776, |
|
"grad_norm": 0.0005369166028685868, |
|
"learning_rate": 0.00017842587535740314, |
|
"loss": 46.0, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.08849134546447714, |
|
"grad_norm": 0.0007034659502096474, |
|
"learning_rate": 0.00017836064309822503, |
|
"loss": 46.0, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.08862790618278653, |
|
"grad_norm": 0.0008996776887215674, |
|
"learning_rate": 0.00017829532433123635, |
|
"loss": 46.0, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.0887644669010959, |
|
"grad_norm": 0.001349107245914638, |
|
"learning_rate": 0.00017822991912854713, |
|
"loss": 46.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.08890102761940528, |
|
"grad_norm": 0.00062900735065341, |
|
"learning_rate": 0.00017816442756236276, |
|
"loss": 46.0, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.08903758833771466, |
|
"grad_norm": 0.000961163139436394, |
|
"learning_rate": 0.00017809884970498396, |
|
"loss": 46.0, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.08917414905602404, |
|
"grad_norm": 0.0008951184572651982, |
|
"learning_rate": 0.00017803318562880673, |
|
"loss": 46.0, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.08931070977433342, |
|
"grad_norm": 0.000987289589829743, |
|
"learning_rate": 0.00017796743540632223, |
|
"loss": 46.0, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.0894472704926428, |
|
"grad_norm": 0.0005511721828952432, |
|
"learning_rate": 0.0001779015991101168, |
|
"loss": 46.0, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.08958383121095218, |
|
"grad_norm": 0.0008138107368722558, |
|
"learning_rate": 0.00017783567681287166, |
|
"loss": 46.0, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.08972039192926155, |
|
"grad_norm": 0.0005222151521593332, |
|
"learning_rate": 0.00017776966858736314, |
|
"loss": 46.0, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.08985695264757093, |
|
"grad_norm": 0.0005912959459237754, |
|
"learning_rate": 0.00017770357450646232, |
|
"loss": 46.0, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.08999351336588031, |
|
"grad_norm": 0.00042330275755375624, |
|
"learning_rate": 0.00017763739464313506, |
|
"loss": 46.0, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.09013007408418969, |
|
"grad_norm": 0.0006147885578684509, |
|
"learning_rate": 0.000177571129070442, |
|
"loss": 46.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.09026663480249907, |
|
"grad_norm": 0.0013564558466896415, |
|
"learning_rate": 0.00017750477786153832, |
|
"loss": 46.0, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.09040319552080844, |
|
"grad_norm": 0.0008140000863932073, |
|
"learning_rate": 0.0001774383410896738, |
|
"loss": 46.0, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.09053975623911782, |
|
"grad_norm": 0.0005956863751634955, |
|
"learning_rate": 0.00017737181882819262, |
|
"loss": 46.0, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.0906763169574272, |
|
"grad_norm": 0.0005575847462750971, |
|
"learning_rate": 0.0001773052111505334, |
|
"loss": 46.0, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.09081287767573658, |
|
"grad_norm": 0.00038412457797676325, |
|
"learning_rate": 0.00017723851813022901, |
|
"loss": 46.0, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.09094943839404596, |
|
"grad_norm": 0.0009578867466188967, |
|
"learning_rate": 0.00017717173984090658, |
|
"loss": 46.0, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.09108599911235533, |
|
"grad_norm": 0.0004775691486429423, |
|
"learning_rate": 0.00017710487635628735, |
|
"loss": 46.0, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.09122255983066471, |
|
"grad_norm": 0.0006319622043520212, |
|
"learning_rate": 0.00017703792775018655, |
|
"loss": 46.0, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.09135912054897409, |
|
"grad_norm": 0.0006505327764898539, |
|
"learning_rate": 0.0001769708940965135, |
|
"loss": 46.0, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.09149568126728347, |
|
"grad_norm": 0.00035890881554223597, |
|
"learning_rate": 0.00017690377546927133, |
|
"loss": 46.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.09163224198559285, |
|
"grad_norm": 0.0005791863077320158, |
|
"learning_rate": 0.000176836571942557, |
|
"loss": 46.0, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.09176880270390222, |
|
"grad_norm": 0.0005439603119157255, |
|
"learning_rate": 0.00017676928359056123, |
|
"loss": 46.0, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.0919053634222116, |
|
"grad_norm": 0.0006497172289527953, |
|
"learning_rate": 0.0001767019104875683, |
|
"loss": 46.0, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.09204192414052098, |
|
"grad_norm": 0.0004997221985831857, |
|
"learning_rate": 0.00017663445270795611, |
|
"loss": 46.0, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.09217848485883036, |
|
"grad_norm": 0.0010348627110943198, |
|
"learning_rate": 0.0001765669103261961, |
|
"loss": 46.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.09231504557713974, |
|
"grad_norm": 0.0006982755148783326, |
|
"learning_rate": 0.00017649928341685297, |
|
"loss": 46.0, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.09245160629544912, |
|
"grad_norm": 0.0008973072981461883, |
|
"learning_rate": 0.00017643157205458483, |
|
"loss": 46.0, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.0925881670137585, |
|
"grad_norm": 0.0009211792494170368, |
|
"learning_rate": 0.00017636377631414303, |
|
"loss": 46.0, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.09272472773206787, |
|
"grad_norm": 0.0013491098070517182, |
|
"learning_rate": 0.00017629589627037203, |
|
"loss": 46.0, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.09286128845037725, |
|
"grad_norm": 0.0011835723416879773, |
|
"learning_rate": 0.00017622793199820934, |
|
"loss": 46.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.09299784916868663, |
|
"grad_norm": 0.0003937036672141403, |
|
"learning_rate": 0.0001761598835726855, |
|
"loss": 46.0, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.093134409886996, |
|
"grad_norm": 0.00029390319832600653, |
|
"learning_rate": 0.00017609175106892395, |
|
"loss": 46.0, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.09327097060530538, |
|
"grad_norm": 0.0005982140428386629, |
|
"learning_rate": 0.00017602353456214095, |
|
"loss": 46.0, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.09340753132361476, |
|
"grad_norm": 0.0007088780985213816, |
|
"learning_rate": 0.00017595523412764549, |
|
"loss": 46.0, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.09354409204192414, |
|
"grad_norm": 0.000541395100299269, |
|
"learning_rate": 0.0001758868498408392, |
|
"loss": 46.0, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.09368065276023352, |
|
"grad_norm": 0.0010750001529231668, |
|
"learning_rate": 0.0001758183817772163, |
|
"loss": 46.0, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.0938172134785429, |
|
"grad_norm": 0.0009917879942804575, |
|
"learning_rate": 0.00017574983001236345, |
|
"loss": 46.0, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.09395377419685227, |
|
"grad_norm": 0.0007594460621476173, |
|
"learning_rate": 0.00017568119462195978, |
|
"loss": 46.0, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.09409033491516165, |
|
"grad_norm": 0.0013289398048073053, |
|
"learning_rate": 0.00017561247568177672, |
|
"loss": 46.0, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.09422689563347103, |
|
"grad_norm": 0.002443774603307247, |
|
"learning_rate": 0.00017554367326767792, |
|
"loss": 46.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.09436345635178041, |
|
"grad_norm": 0.000834045116789639, |
|
"learning_rate": 0.0001754747874556192, |
|
"loss": 46.0, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.09450001707008979, |
|
"grad_norm": 0.0008974446100182831, |
|
"learning_rate": 0.0001754058183216484, |
|
"loss": 46.0, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.09463657778839916, |
|
"grad_norm": 0.0006684563704766333, |
|
"learning_rate": 0.00017533676594190544, |
|
"loss": 46.0, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.09477313850670854, |
|
"grad_norm": 0.0005444415146484971, |
|
"learning_rate": 0.00017526763039262206, |
|
"loss": 46.0, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.09490969922501792, |
|
"grad_norm": 0.00041295934352092445, |
|
"learning_rate": 0.00017519841175012184, |
|
"loss": 46.0, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.0950462599433273, |
|
"grad_norm": 0.0014025474665686488, |
|
"learning_rate": 0.0001751291100908201, |
|
"loss": 46.0, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.09518282066163668, |
|
"grad_norm": 0.0016710846684873104, |
|
"learning_rate": 0.0001750597254912238, |
|
"loss": 46.0, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.09531938137994606, |
|
"grad_norm": 0.002275792183354497, |
|
"learning_rate": 0.00017499025802793146, |
|
"loss": 46.0, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.09545594209825543, |
|
"grad_norm": 0.001069681253284216, |
|
"learning_rate": 0.0001749207077776331, |
|
"loss": 46.0, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.09559250281656481, |
|
"grad_norm": 0.0005686444346792996, |
|
"learning_rate": 0.00017485107481711012, |
|
"loss": 46.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.09572906353487419, |
|
"grad_norm": 0.0004446564707905054, |
|
"learning_rate": 0.00017478135922323522, |
|
"loss": 46.0, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.09586562425318357, |
|
"grad_norm": 0.0005210356903262436, |
|
"learning_rate": 0.00017471156107297232, |
|
"loss": 46.0, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.09600218497149295, |
|
"grad_norm": 0.0006958620506338775, |
|
"learning_rate": 0.00017464168044337654, |
|
"loss": 46.0, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.09613874568980232, |
|
"grad_norm": 0.000465748249553144, |
|
"learning_rate": 0.00017457171741159394, |
|
"loss": 46.0, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.0962753064081117, |
|
"grad_norm": 0.0003542457125149667, |
|
"learning_rate": 0.0001745016720548617, |
|
"loss": 46.0, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.09641186712642108, |
|
"grad_norm": 0.0006130662513896823, |
|
"learning_rate": 0.00017443154445050775, |
|
"loss": 46.0, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.09654842784473046, |
|
"grad_norm": 0.0005357970949262381, |
|
"learning_rate": 0.00017436133467595087, |
|
"loss": 46.0, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.09668498856303984, |
|
"grad_norm": 0.0003605932288337499, |
|
"learning_rate": 0.00017429104280870057, |
|
"loss": 46.0, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.09682154928134921, |
|
"grad_norm": 0.0004822358023375273, |
|
"learning_rate": 0.00017422066892635696, |
|
"loss": 46.0, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.09695810999965859, |
|
"grad_norm": 0.000799324014224112, |
|
"learning_rate": 0.0001741502131066107, |
|
"loss": 46.0, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.09709467071796797, |
|
"grad_norm": 0.0005530801718123257, |
|
"learning_rate": 0.00017407967542724297, |
|
"loss": 46.0, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.09723123143627735, |
|
"grad_norm": 0.0008446394931524992, |
|
"learning_rate": 0.0001740090559661252, |
|
"loss": 46.0, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.09736779215458673, |
|
"grad_norm": 0.0004706636827904731, |
|
"learning_rate": 0.0001739383548012192, |
|
"loss": 46.0, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.0975043528728961, |
|
"grad_norm": 0.0003858576819766313, |
|
"learning_rate": 0.00017386757201057692, |
|
"loss": 46.0, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.09764091359120548, |
|
"grad_norm": 0.0006433392409235239, |
|
"learning_rate": 0.00017379670767234045, |
|
"loss": 46.0, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.09777747430951486, |
|
"grad_norm": 0.00046265096170827746, |
|
"learning_rate": 0.0001737257618647419, |
|
"loss": 46.0, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.09791403502782425, |
|
"grad_norm": 0.0004183817654848099, |
|
"learning_rate": 0.00017365473466610337, |
|
"loss": 46.0, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.09805059574613363, |
|
"grad_norm": 0.0005469456664286554, |
|
"learning_rate": 0.0001735836261548367, |
|
"loss": 46.0, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.09818715646444301, |
|
"grad_norm": 0.000383463193429634, |
|
"learning_rate": 0.00017351243640944362, |
|
"loss": 46.0, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.09832371718275239, |
|
"grad_norm": 0.0005470316973514855, |
|
"learning_rate": 0.00017344116550851543, |
|
"loss": 46.0, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.09846027790106177, |
|
"grad_norm": 0.00037759976112283766, |
|
"learning_rate": 0.00017336981353073314, |
|
"loss": 46.0, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.09859683861937114, |
|
"grad_norm": 0.0006073216791264713, |
|
"learning_rate": 0.00017329838055486717, |
|
"loss": 46.0, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.09873339933768052, |
|
"grad_norm": 0.0004972846945747733, |
|
"learning_rate": 0.00017322686665977737, |
|
"loss": 46.0, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.0988699600559899, |
|
"grad_norm": 0.0004591084725689143, |
|
"learning_rate": 0.00017315527192441298, |
|
"loss": 46.0, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.09900652077429928, |
|
"grad_norm": 0.0008169691427610815, |
|
"learning_rate": 0.00017308359642781242, |
|
"loss": 46.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.09914308149260866, |
|
"grad_norm": 0.0005359476199373603, |
|
"learning_rate": 0.00017301184024910333, |
|
"loss": 46.0, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.09927964221091803, |
|
"grad_norm": 0.0005213018739596009, |
|
"learning_rate": 0.0001729400034675024, |
|
"loss": 46.0, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.09941620292922741, |
|
"grad_norm": 0.0006647670525126159, |
|
"learning_rate": 0.00017286808616231522, |
|
"loss": 46.0, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.09955276364753679, |
|
"grad_norm": 0.0007033472065813839, |
|
"learning_rate": 0.00017279608841293639, |
|
"loss": 46.0, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.09968932436584617, |
|
"grad_norm": 0.0009473874815739691, |
|
"learning_rate": 0.00017272401029884933, |
|
"loss": 46.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.09982588508415555, |
|
"grad_norm": 0.0005871194880455732, |
|
"learning_rate": 0.00017265185189962608, |
|
"loss": 46.0, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.09996244580246492, |
|
"grad_norm": 0.00048681360203772783, |
|
"learning_rate": 0.00017257961329492728, |
|
"loss": 46.0, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.1000990065207743, |
|
"grad_norm": 0.0005745171220041811, |
|
"learning_rate": 0.00017250729456450234, |
|
"loss": 46.0, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.10023556723908368, |
|
"grad_norm": 0.00050855748122558, |
|
"learning_rate": 0.0001724348957881889, |
|
"loss": 46.0, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.10037212795739306, |
|
"grad_norm": 0.00042842066613957286, |
|
"learning_rate": 0.00017236241704591304, |
|
"loss": 46.0, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.10050868867570244, |
|
"grad_norm": 0.0010201798286288977, |
|
"learning_rate": 0.00017228985841768915, |
|
"loss": 46.0, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.10064524939401182, |
|
"grad_norm": 0.0005850894376635551, |
|
"learning_rate": 0.00017221721998361976, |
|
"loss": 46.0, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.1007818101123212, |
|
"grad_norm": 0.0009064356563612819, |
|
"learning_rate": 0.00017214450182389559, |
|
"loss": 46.0, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.10091837083063057, |
|
"grad_norm": 0.0015590882394462824, |
|
"learning_rate": 0.00017207170401879526, |
|
"loss": 46.0, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.10105493154893995, |
|
"grad_norm": 0.0011711895931512117, |
|
"learning_rate": 0.00017199882664868538, |
|
"loss": 46.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.10119149226724933, |
|
"grad_norm": 0.0029380624182522297, |
|
"learning_rate": 0.00017192586979402044, |
|
"loss": 46.0, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.1013280529855587, |
|
"grad_norm": 0.0007488722330890596, |
|
"learning_rate": 0.0001718528335353426, |
|
"loss": 46.0, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.10146461370386808, |
|
"grad_norm": 0.0006716151256114244, |
|
"learning_rate": 0.00017177971795328167, |
|
"loss": 46.0, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.10160117442217746, |
|
"grad_norm": 0.0008320304332301021, |
|
"learning_rate": 0.00017170652312855514, |
|
"loss": 46.0, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.10173773514048684, |
|
"grad_norm": 0.002169104292988777, |
|
"learning_rate": 0.00017163324914196783, |
|
"loss": 46.0, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.10187429585879622, |
|
"grad_norm": 0.0008557327091693878, |
|
"learning_rate": 0.00017155989607441213, |
|
"loss": 46.0, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.1020108565771056, |
|
"grad_norm": 0.0009351296466775239, |
|
"learning_rate": 0.00017148646400686753, |
|
"loss": 46.0, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.10214741729541497, |
|
"grad_norm": 0.0019438541494309902, |
|
"learning_rate": 0.00017141295302040095, |
|
"loss": 46.0, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.10228397801372435, |
|
"grad_norm": 0.0014289816608652472, |
|
"learning_rate": 0.00017133936319616626, |
|
"loss": 46.0, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.10242053873203373, |
|
"grad_norm": 0.003017352195456624, |
|
"learning_rate": 0.00017126569461540443, |
|
"loss": 46.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.10255709945034311, |
|
"grad_norm": 0.0006331245531328022, |
|
"learning_rate": 0.00017119194735944337, |
|
"loss": 46.0, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.10269366016865249, |
|
"grad_norm": 0.0009659952484071255, |
|
"learning_rate": 0.0001711181215096979, |
|
"loss": 46.0, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.10283022088696187, |
|
"grad_norm": 0.0003564142098184675, |
|
"learning_rate": 0.00017104421714766947, |
|
"loss": 46.0, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.10296678160527124, |
|
"grad_norm": 0.0004242552968207747, |
|
"learning_rate": 0.00017097023435494636, |
|
"loss": 46.0, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.10310334232358062, |
|
"grad_norm": 0.00037972754216752946, |
|
"learning_rate": 0.00017089617321320335, |
|
"loss": 46.0, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.10323990304189, |
|
"grad_norm": 0.00022272802016232163, |
|
"learning_rate": 0.0001708220338042017, |
|
"loss": 46.0, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.10337646376019938, |
|
"grad_norm": 0.0004986607236787677, |
|
"learning_rate": 0.0001707478162097891, |
|
"loss": 46.0, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.10351302447850876, |
|
"grad_norm": 0.0006160014308989048, |
|
"learning_rate": 0.00017067352051189965, |
|
"loss": 46.0, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.10364958519681813, |
|
"grad_norm": 0.0006992130074650049, |
|
"learning_rate": 0.0001705991467925535, |
|
"loss": 46.0, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.10378614591512751, |
|
"grad_norm": 0.0003626852994784713, |
|
"learning_rate": 0.000170524695133857, |
|
"loss": 46.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.10392270663343689, |
|
"grad_norm": 0.0008579789428040385, |
|
"learning_rate": 0.0001704501656180026, |
|
"loss": 46.0, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.10405926735174627, |
|
"grad_norm": 0.0009895421098917723, |
|
"learning_rate": 0.00017037555832726865, |
|
"loss": 46.0, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.10419582807005565, |
|
"grad_norm": 0.0005099592381156981, |
|
"learning_rate": 0.00017030087334401936, |
|
"loss": 46.0, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.10433238878836502, |
|
"grad_norm": 0.0006343593122437596, |
|
"learning_rate": 0.00017022611075070474, |
|
"loss": 46.0, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.1044689495066744, |
|
"grad_norm": 0.0002918439276982099, |
|
"learning_rate": 0.00017015127062986043, |
|
"loss": 46.0, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.10460551022498378, |
|
"grad_norm": 0.00021448293409775943, |
|
"learning_rate": 0.00017007635306410775, |
|
"loss": 46.0, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.10474207094329316, |
|
"grad_norm": 0.0003549446410033852, |
|
"learning_rate": 0.00017000135813615338, |
|
"loss": 46.0, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.10487863166160254, |
|
"grad_norm": 0.0008276899461634457, |
|
"learning_rate": 0.00016992628592878956, |
|
"loss": 46.0, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.10501519237991191, |
|
"grad_norm": 0.0006797234527766705, |
|
"learning_rate": 0.00016985113652489374, |
|
"loss": 46.0, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.10515175309822129, |
|
"grad_norm": 0.0012585432268679142, |
|
"learning_rate": 0.00016977591000742854, |
|
"loss": 46.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.10528831381653067, |
|
"grad_norm": 0.0007930306601338089, |
|
"learning_rate": 0.0001697006064594419, |
|
"loss": 46.0, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.10542487453484005, |
|
"grad_norm": 0.0008263712516054511, |
|
"learning_rate": 0.00016962522596406662, |
|
"loss": 46.0, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.10556143525314943, |
|
"grad_norm": 0.0004895622842013836, |
|
"learning_rate": 0.00016954976860452054, |
|
"loss": 46.0, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.1056979959714588, |
|
"grad_norm": 0.000630986993201077, |
|
"learning_rate": 0.00016947423446410636, |
|
"loss": 46.0, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.10583455668976818, |
|
"grad_norm": 0.0006404675077646971, |
|
"learning_rate": 0.00016939862362621146, |
|
"loss": 46.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.10597111740807756, |
|
"grad_norm": 0.0023967279121279716, |
|
"learning_rate": 0.00016932293617430796, |
|
"loss": 46.0, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.10610767812638694, |
|
"grad_norm": 0.0005658793961629272, |
|
"learning_rate": 0.0001692471721919526, |
|
"loss": 46.0, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.10624423884469632, |
|
"grad_norm": 0.0009352597990073264, |
|
"learning_rate": 0.00016917133176278648, |
|
"loss": 46.0, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.1063807995630057, |
|
"grad_norm": 0.0011438351357355714, |
|
"learning_rate": 0.00016909541497053522, |
|
"loss": 46.0, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.10651736028131507, |
|
"grad_norm": 0.0010559734655544162, |
|
"learning_rate": 0.00016901942189900867, |
|
"loss": 46.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.10665392099962445, |
|
"grad_norm": 0.0005015085334889591, |
|
"learning_rate": 0.0001689433526321009, |
|
"loss": 46.0, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.10679048171793383, |
|
"grad_norm": 0.0003511472314130515, |
|
"learning_rate": 0.0001688672072537902, |
|
"loss": 46.0, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.10692704243624321, |
|
"grad_norm": 0.000647745851892978, |
|
"learning_rate": 0.00016879098584813865, |
|
"loss": 46.0, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.10706360315455259, |
|
"grad_norm": 0.0014171921648085117, |
|
"learning_rate": 0.00016871468849929253, |
|
"loss": 46.0, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.10720016387286196, |
|
"grad_norm": 0.0005998522392474115, |
|
"learning_rate": 0.00016863831529148176, |
|
"loss": 46.0, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.10733672459117136, |
|
"grad_norm": 0.0008481833501718938, |
|
"learning_rate": 0.00016856186630902014, |
|
"loss": 46.0, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.10747328530948073, |
|
"grad_norm": 0.001625437056645751, |
|
"learning_rate": 0.00016848534163630498, |
|
"loss": 46.0, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.10760984602779011, |
|
"grad_norm": 0.004598119296133518, |
|
"learning_rate": 0.0001684087413578173, |
|
"loss": 46.0, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.10774640674609949, |
|
"grad_norm": 0.0032096696086227894, |
|
"learning_rate": 0.00016833206555812153, |
|
"loss": 46.0, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.10788296746440887, |
|
"grad_norm": 0.00042951159412041306, |
|
"learning_rate": 0.00016825531432186543, |
|
"loss": 46.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.10801952818271825, |
|
"grad_norm": 0.0017844205722212791, |
|
"learning_rate": 0.00016817848773378007, |
|
"loss": 46.0, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.10815608890102762, |
|
"grad_norm": 0.0006350985495373607, |
|
"learning_rate": 0.00016810158587867973, |
|
"loss": 46.0, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.108292649619337, |
|
"grad_norm": 0.0009360946132801473, |
|
"learning_rate": 0.00016802460884146175, |
|
"loss": 46.0, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.10842921033764638, |
|
"grad_norm": 0.00030124749173410237, |
|
"learning_rate": 0.0001679475567071065, |
|
"loss": 46.0, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.10856577105595576, |
|
"grad_norm": 0.00164910894818604, |
|
"learning_rate": 0.0001678704295606772, |
|
"loss": 46.0, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.10870233177426514, |
|
"grad_norm": 0.0012204207014292479, |
|
"learning_rate": 0.00016779322748731995, |
|
"loss": 46.0, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.10883889249257452, |
|
"grad_norm": 0.0007130260928533971, |
|
"learning_rate": 0.0001677159505722635, |
|
"loss": 46.0, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.1089754532108839, |
|
"grad_norm": 0.0006258345092646778, |
|
"learning_rate": 0.0001676385989008193, |
|
"loss": 46.0, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.10911201392919327, |
|
"grad_norm": 0.0013600183883681893, |
|
"learning_rate": 0.00016756117255838128, |
|
"loss": 46.0, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.10924857464750265, |
|
"grad_norm": 0.001245712861418724, |
|
"learning_rate": 0.00016748367163042576, |
|
"loss": 46.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.10938513536581203, |
|
"grad_norm": 0.00043424879550002515, |
|
"learning_rate": 0.0001674060962025115, |
|
"loss": 46.0, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.1095216960841214, |
|
"grad_norm": 0.00040041119791567326, |
|
"learning_rate": 0.00016732844636027948, |
|
"loss": 46.0, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.10965825680243078, |
|
"grad_norm": 0.00047825041110627353, |
|
"learning_rate": 0.00016725072218945272, |
|
"loss": 46.0, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.10979481752074016, |
|
"grad_norm": 0.0006943688495084643, |
|
"learning_rate": 0.00016717292377583647, |
|
"loss": 46.0, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.10993137823904954, |
|
"grad_norm": 0.00046805053716525435, |
|
"learning_rate": 0.00016709505120531782, |
|
"loss": 46.0, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.11006793895735892, |
|
"grad_norm": 0.001021806849166751, |
|
"learning_rate": 0.00016701710456386572, |
|
"loss": 46.0, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.1102044996756683, |
|
"grad_norm": 0.000839448010083288, |
|
"learning_rate": 0.000166939083937531, |
|
"loss": 46.0, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.11034106039397767, |
|
"grad_norm": 0.0007610208704136312, |
|
"learning_rate": 0.0001668609894124461, |
|
"loss": 46.0, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.11047762111228705, |
|
"grad_norm": 0.0008387729176320136, |
|
"learning_rate": 0.00016678282107482502, |
|
"loss": 46.0, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.11061418183059643, |
|
"grad_norm": 0.0005413633771240711, |
|
"learning_rate": 0.00016670457901096328, |
|
"loss": 46.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.11075074254890581, |
|
"grad_norm": 0.0008596319821663201, |
|
"learning_rate": 0.0001666262633072378, |
|
"loss": 46.0, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.11088730326721519, |
|
"grad_norm": 0.0005809023859910667, |
|
"learning_rate": 0.0001665478740501067, |
|
"loss": 46.0, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.11102386398552457, |
|
"grad_norm": 0.0005002027610316873, |
|
"learning_rate": 0.00016646941132610947, |
|
"loss": 46.0, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.11116042470383394, |
|
"grad_norm": 0.0005074172513559461, |
|
"learning_rate": 0.0001663908752218666, |
|
"loss": 46.0, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.11129698542214332, |
|
"grad_norm": 0.00038702471647411585, |
|
"learning_rate": 0.00016631226582407952, |
|
"loss": 46.0, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.1114335461404527, |
|
"grad_norm": 0.0005355363246053457, |
|
"learning_rate": 0.00016623358321953078, |
|
"loss": 46.0, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.11157010685876208, |
|
"grad_norm": 0.000400405318941921, |
|
"learning_rate": 0.00016615482749508356, |
|
"loss": 46.0, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.11170666757707146, |
|
"grad_norm": 0.0003890878870151937, |
|
"learning_rate": 0.00016607599873768182, |
|
"loss": 46.0, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.11184322829538083, |
|
"grad_norm": 0.0006321795517578721, |
|
"learning_rate": 0.0001659970970343502, |
|
"loss": 46.0, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.11197978901369021, |
|
"grad_norm": 0.0007353540859185159, |
|
"learning_rate": 0.00016591812247219377, |
|
"loss": 46.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.11211634973199959, |
|
"grad_norm": 0.0012658998603001237, |
|
"learning_rate": 0.00016583907513839817, |
|
"loss": 46.0, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.11225291045030897, |
|
"grad_norm": 0.0012359356041997671, |
|
"learning_rate": 0.00016575995512022921, |
|
"loss": 46.0, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.11238947116861835, |
|
"grad_norm": 0.00024977774592116475, |
|
"learning_rate": 0.00016568076250503304, |
|
"loss": 46.0, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.11252603188692772, |
|
"grad_norm": 0.0007921140058897436, |
|
"learning_rate": 0.000165601497380236, |
|
"loss": 46.0, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.1126625926052371, |
|
"grad_norm": 0.00028611160814762115, |
|
"learning_rate": 0.00016552215983334437, |
|
"loss": 46.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.11279915332354648, |
|
"grad_norm": 0.00045781530207023025, |
|
"learning_rate": 0.00016544274995194448, |
|
"loss": 46.0, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.11293571404185586, |
|
"grad_norm": 0.0010054416488856077, |
|
"learning_rate": 0.0001653632678237024, |
|
"loss": 46.0, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.11307227476016524, |
|
"grad_norm": 0.0003318900417070836, |
|
"learning_rate": 0.00016528371353636406, |
|
"loss": 46.0, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.11320883547847461, |
|
"grad_norm": 0.0006165113882161677, |
|
"learning_rate": 0.00016520408717775507, |
|
"loss": 46.0, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.11334539619678399, |
|
"grad_norm": 0.0005957252578809857, |
|
"learning_rate": 0.00016512438883578044, |
|
"loss": 46.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.11348195691509337, |
|
"grad_norm": 0.0002892489719670266, |
|
"learning_rate": 0.00016504461859842486, |
|
"loss": 46.0, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.11361851763340275, |
|
"grad_norm": 0.00041660640272311866, |
|
"learning_rate": 0.00016496477655375227, |
|
"loss": 46.0, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.11375507835171213, |
|
"grad_norm": 0.00023521836556028575, |
|
"learning_rate": 0.00016488486278990586, |
|
"loss": 46.0, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.1138916390700215, |
|
"grad_norm": 0.0009512827964499593, |
|
"learning_rate": 0.00016480487739510807, |
|
"loss": 46.0, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.11402819978833088, |
|
"grad_norm": 0.0008759453776292503, |
|
"learning_rate": 0.00016472482045766043, |
|
"loss": 46.0, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.11416476050664026, |
|
"grad_norm": 0.000294568162644282, |
|
"learning_rate": 0.00016464469206594332, |
|
"loss": 46.0, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.11430132122494964, |
|
"grad_norm": 0.0007791619864292443, |
|
"learning_rate": 0.00016456449230841617, |
|
"loss": 46.0, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.11443788194325902, |
|
"grad_norm": 0.0015861823922023177, |
|
"learning_rate": 0.00016448422127361706, |
|
"loss": 46.0, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.1145744426615684, |
|
"grad_norm": 0.0011900209356099367, |
|
"learning_rate": 0.00016440387905016285, |
|
"loss": 46.0, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.11471100337987777, |
|
"grad_norm": 0.0017578421393409371, |
|
"learning_rate": 0.00016432346572674896, |
|
"loss": 46.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.11484756409818715, |
|
"grad_norm": 0.0018410708289593458, |
|
"learning_rate": 0.00016424298139214929, |
|
"loss": 46.0, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.11498412481649653, |
|
"grad_norm": 0.00044194411020725965, |
|
"learning_rate": 0.0001641624261352161, |
|
"loss": 46.0, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.11512068553480591, |
|
"grad_norm": 0.0007466517854481936, |
|
"learning_rate": 0.00016408180004488007, |
|
"loss": 46.0, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.11525724625311529, |
|
"grad_norm": 0.0008741529891267419, |
|
"learning_rate": 0.00016400110321014992, |
|
"loss": 46.0, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.11539380697142466, |
|
"grad_norm": 0.0008493126952089369, |
|
"learning_rate": 0.00016392033572011261, |
|
"loss": 46.0, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.11553036768973404, |
|
"grad_norm": 0.0007634422508999705, |
|
"learning_rate": 0.000163839497663933, |
|
"loss": 46.0, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.11566692840804342, |
|
"grad_norm": 0.0005714827566407621, |
|
"learning_rate": 0.0001637585891308539, |
|
"loss": 46.0, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.1158034891263528, |
|
"grad_norm": 0.0008292211568914354, |
|
"learning_rate": 0.0001636776102101959, |
|
"loss": 46.0, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.11594004984466218, |
|
"grad_norm": 0.00044870973215438426, |
|
"learning_rate": 0.00016359656099135733, |
|
"loss": 46.0, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.11607661056297155, |
|
"grad_norm": 0.0012383662397041917, |
|
"learning_rate": 0.00016351544156381414, |
|
"loss": 46.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.11621317128128093, |
|
"grad_norm": 0.0004602587141562253, |
|
"learning_rate": 0.00016343425201711966, |
|
"loss": 46.0, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.11634973199959031, |
|
"grad_norm": 0.0005955328815616667, |
|
"learning_rate": 0.00016335299244090478, |
|
"loss": 46.0, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.11648629271789969, |
|
"grad_norm": 0.0005287445383146405, |
|
"learning_rate": 0.0001632716629248777, |
|
"loss": 46.0, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.11662285343620908, |
|
"grad_norm": 0.0004926318651996553, |
|
"learning_rate": 0.0001631902635588237, |
|
"loss": 46.0, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.11675941415451846, |
|
"grad_norm": 0.0010656327940523624, |
|
"learning_rate": 0.00016310879443260528, |
|
"loss": 46.0, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.11689597487282784, |
|
"grad_norm": 0.0005354030872695148, |
|
"learning_rate": 0.00016302725563616192, |
|
"loss": 46.0, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.11703253559113722, |
|
"grad_norm": 0.0005059898248873651, |
|
"learning_rate": 0.00016294564725951002, |
|
"loss": 46.0, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.1171690963094466, |
|
"grad_norm": 0.0004201128613203764, |
|
"learning_rate": 0.0001628639693927428, |
|
"loss": 46.0, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.11730565702775597, |
|
"grad_norm": 0.0003003796737175435, |
|
"learning_rate": 0.00016278222212603018, |
|
"loss": 46.0, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.11744221774606535, |
|
"grad_norm": 0.0005390554433688521, |
|
"learning_rate": 0.00016270040554961868, |
|
"loss": 46.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.11757877846437473, |
|
"grad_norm": 0.0015715021872892976, |
|
"learning_rate": 0.00016261851975383137, |
|
"loss": 46.0, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.1177153391826841, |
|
"grad_norm": 0.0010319905122742057, |
|
"learning_rate": 0.00016253656482906776, |
|
"loss": 46.0, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.11785189990099348, |
|
"grad_norm": 0.000347215129295364, |
|
"learning_rate": 0.0001624545408658036, |
|
"loss": 46.0, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.11798846061930286, |
|
"grad_norm": 0.0006791274063289165, |
|
"learning_rate": 0.00016237244795459086, |
|
"loss": 46.0, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.11812502133761224, |
|
"grad_norm": 0.00031977854087017477, |
|
"learning_rate": 0.00016229028618605775, |
|
"loss": 46.0, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.11826158205592162, |
|
"grad_norm": 0.0037054885178804398, |
|
"learning_rate": 0.00016220805565090836, |
|
"loss": 46.0, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.118398142774231, |
|
"grad_norm": 0.0005012017791159451, |
|
"learning_rate": 0.00016212575643992277, |
|
"loss": 46.0, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.11853470349254037, |
|
"grad_norm": 0.00028602650854736567, |
|
"learning_rate": 0.00016204338864395684, |
|
"loss": 46.0, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.11867126421084975, |
|
"grad_norm": 0.0006842486909590662, |
|
"learning_rate": 0.00016196095235394207, |
|
"loss": 46.0, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.11880782492915913, |
|
"grad_norm": 0.000561655790079385, |
|
"learning_rate": 0.00016187844766088586, |
|
"loss": 46.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.11894438564746851, |
|
"grad_norm": 0.002418296178802848, |
|
"learning_rate": 0.00016179587465587077, |
|
"loss": 46.0, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.11908094636577789, |
|
"grad_norm": 0.00027192741981707513, |
|
"learning_rate": 0.00016171323343005498, |
|
"loss": 46.0, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.11921750708408727, |
|
"grad_norm": 0.0004957255441695452, |
|
"learning_rate": 0.0001616305240746719, |
|
"loss": 46.0, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.11935406780239664, |
|
"grad_norm": 0.00048530122148804367, |
|
"learning_rate": 0.00016154774668103027, |
|
"loss": 46.0, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.11949062852070602, |
|
"grad_norm": 0.0009217667393386364, |
|
"learning_rate": 0.0001614649013405138, |
|
"loss": 46.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.1196271892390154, |
|
"grad_norm": 0.0015625512460246682, |
|
"learning_rate": 0.0001613819881445813, |
|
"loss": 46.0, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.11976374995732478, |
|
"grad_norm": 0.0009254494798369706, |
|
"learning_rate": 0.00016129900718476637, |
|
"loss": 46.0, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.11990031067563416, |
|
"grad_norm": 0.00046171454596333206, |
|
"learning_rate": 0.00016121595855267767, |
|
"loss": 46.0, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.12003687139394353, |
|
"grad_norm": 0.0003148307732772082, |
|
"learning_rate": 0.0001611328423399983, |
|
"loss": 46.0, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.12017343211225291, |
|
"grad_norm": 0.0004730523796752095, |
|
"learning_rate": 0.00016104965863848617, |
|
"loss": 46.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.12030999283056229, |
|
"grad_norm": 0.001140785519964993, |
|
"learning_rate": 0.00016096640753997346, |
|
"loss": 46.0, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.12044655354887167, |
|
"grad_norm": 0.0007769656367599964, |
|
"learning_rate": 0.00016088308913636703, |
|
"loss": 46.0, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.12058311426718105, |
|
"grad_norm": 0.000732703018002212, |
|
"learning_rate": 0.00016079970351964783, |
|
"loss": 46.0, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.12071967498549042, |
|
"grad_norm": 0.0007818607264198363, |
|
"learning_rate": 0.00016071625078187114, |
|
"loss": 46.0, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.1208562357037998, |
|
"grad_norm": 0.0006149871041998267, |
|
"learning_rate": 0.00016063273101516625, |
|
"loss": 46.0, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.12099279642210918, |
|
"grad_norm": 0.0008054501377046108, |
|
"learning_rate": 0.00016054914431173654, |
|
"loss": 46.0, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.12112935714041856, |
|
"grad_norm": 0.0008161257137544453, |
|
"learning_rate": 0.0001604654907638592, |
|
"loss": 46.0, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.12126591785872794, |
|
"grad_norm": 0.0030461640562862158, |
|
"learning_rate": 0.00016038177046388523, |
|
"loss": 46.0, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.12140247857703731, |
|
"grad_norm": 0.0007848728564567864, |
|
"learning_rate": 0.0001602979835042394, |
|
"loss": 46.0, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.12153903929534669, |
|
"grad_norm": 0.005675900261849165, |
|
"learning_rate": 0.00016021412997741993, |
|
"loss": 46.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.12167560001365607, |
|
"grad_norm": 0.00039349167491309345, |
|
"learning_rate": 0.0001601302099759987, |
|
"loss": 46.0, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.12181216073196545, |
|
"grad_norm": 0.0006075625424273312, |
|
"learning_rate": 0.00016004622359262085, |
|
"loss": 46.0, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.12194872145027483, |
|
"grad_norm": 0.0003772106138058007, |
|
"learning_rate": 0.0001599621709200048, |
|
"loss": 46.0, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.1220852821685842, |
|
"grad_norm": 0.0015227465191856027, |
|
"learning_rate": 0.00015987805205094227, |
|
"loss": 46.0, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.12222184288689358, |
|
"grad_norm": 0.00073139468440786, |
|
"learning_rate": 0.00015979386707829792, |
|
"loss": 46.0, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.12235840360520296, |
|
"grad_norm": 0.0011622250312939286, |
|
"learning_rate": 0.00015970961609500944, |
|
"loss": 46.0, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.12249496432351234, |
|
"grad_norm": 0.001449939445592463, |
|
"learning_rate": 0.00015962529919408746, |
|
"loss": 46.0, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.12263152504182172, |
|
"grad_norm": 0.0005385968834161758, |
|
"learning_rate": 0.00015954091646861525, |
|
"loss": 46.0, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.1227680857601311, |
|
"grad_norm": 0.0007052323780953884, |
|
"learning_rate": 0.00015945646801174886, |
|
"loss": 46.0, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.12290464647844047, |
|
"grad_norm": 0.001713512814603746, |
|
"learning_rate": 0.0001593719539167169, |
|
"loss": 46.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12290464647844047, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 20.6643, |
|
"eval_samples_per_second": 149.243, |
|
"eval_steps_per_second": 74.621, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12304120719674985, |
|
"grad_norm": 0.0009241271764039993, |
|
"learning_rate": 0.00015928737427682032, |
|
"loss": 46.0, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.12317776791505923, |
|
"grad_norm": 0.0003521353646647185, |
|
"learning_rate": 0.00015920272918543257, |
|
"loss": 46.0, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.12331432863336861, |
|
"grad_norm": 0.0004476907488424331, |
|
"learning_rate": 0.00015911801873599933, |
|
"loss": 46.0, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.12345088935167799, |
|
"grad_norm": 0.0008625802001915872, |
|
"learning_rate": 0.00015903324302203836, |
|
"loss": 46.0, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.12358745006998736, |
|
"grad_norm": 0.000347345310728997, |
|
"learning_rate": 0.00015894840213713952, |
|
"loss": 46.0, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.12372401078829674, |
|
"grad_norm": 0.00042961168219335377, |
|
"learning_rate": 0.0001588634961749646, |
|
"loss": 46.0, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.12386057150660612, |
|
"grad_norm": 0.0005479567334987223, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 46.0, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.1239971322249155, |
|
"grad_norm": 0.000546832219697535, |
|
"learning_rate": 0.00015869348939379302, |
|
"loss": 46.0, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.12413369294322488, |
|
"grad_norm": 0.0005068538011983037, |
|
"learning_rate": 0.0001586083887624787, |
|
"loss": 46.0, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.12427025366153426, |
|
"grad_norm": 0.0005834728945046663, |
|
"learning_rate": 0.00015852322342925295, |
|
"loss": 46.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.12440681437984363, |
|
"grad_norm": 0.0004933670861646533, |
|
"learning_rate": 0.00015843799348813574, |
|
"loss": 46.0, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.12454337509815301, |
|
"grad_norm": 0.0011595729738473892, |
|
"learning_rate": 0.0001583526990332184, |
|
"loss": 46.0, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.12467993581646239, |
|
"grad_norm": 0.00029905018163844943, |
|
"learning_rate": 0.00015826734015866344, |
|
"loss": 46.0, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.12481649653477177, |
|
"grad_norm": 0.000434244517236948, |
|
"learning_rate": 0.00015818191695870452, |
|
"loss": 46.0, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.12495305725308115, |
|
"grad_norm": 0.0006040096050128341, |
|
"learning_rate": 0.00015809642952764632, |
|
"loss": 46.0, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.12508961797139054, |
|
"grad_norm": 0.0007016469608061016, |
|
"learning_rate": 0.00015801087795986438, |
|
"loss": 46.0, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.1252261786896999, |
|
"grad_norm": 0.0003513791016303003, |
|
"learning_rate": 0.0001579252623498051, |
|
"loss": 46.0, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.1253627394080093, |
|
"grad_norm": 0.0003278540389146656, |
|
"learning_rate": 0.0001578395827919855, |
|
"loss": 46.0, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.12549930012631866, |
|
"grad_norm": 0.000646027154289186, |
|
"learning_rate": 0.00015775383938099332, |
|
"loss": 46.0, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.12563586084462805, |
|
"grad_norm": 0.0005723676295019686, |
|
"learning_rate": 0.00015766803221148673, |
|
"loss": 46.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.12577242156293741, |
|
"grad_norm": 0.0015359300887212157, |
|
"learning_rate": 0.00015758216137819422, |
|
"loss": 46.0, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.1259089822812468, |
|
"grad_norm": 0.00043970157275907695, |
|
"learning_rate": 0.0001574962269759147, |
|
"loss": 46.0, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.12604554299955617, |
|
"grad_norm": 0.00028622214449569583, |
|
"learning_rate": 0.00015741022909951716, |
|
"loss": 46.0, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.12618210371786556, |
|
"grad_norm": 0.00038283158210106194, |
|
"learning_rate": 0.00015732416784394065, |
|
"loss": 46.0, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.12631866443617493, |
|
"grad_norm": 0.00039500248385593295, |
|
"learning_rate": 0.00015723804330419422, |
|
"loss": 46.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.12645522515448432, |
|
"grad_norm": 0.0004930765135213733, |
|
"learning_rate": 0.00015715185557535689, |
|
"loss": 46.0, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.12659178587279368, |
|
"grad_norm": 0.0005405626725405455, |
|
"learning_rate": 0.00015706560475257727, |
|
"loss": 46.0, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.12672834659110307, |
|
"grad_norm": 0.00043443331378512084, |
|
"learning_rate": 0.00015697929093107365, |
|
"loss": 46.0, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.12686490730941244, |
|
"grad_norm": 0.0005110527272336185, |
|
"learning_rate": 0.000156892914206134, |
|
"loss": 46.0, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.12700146802772183, |
|
"grad_norm": 0.00031187915010377765, |
|
"learning_rate": 0.00015680647467311557, |
|
"loss": 46.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.1271380287460312, |
|
"grad_norm": 0.0001981136156246066, |
|
"learning_rate": 0.00015671997242744511, |
|
"loss": 46.0, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.1272745894643406, |
|
"grad_norm": 0.0026429896242916584, |
|
"learning_rate": 0.00015663340756461844, |
|
"loss": 46.0, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.12741115018264995, |
|
"grad_norm": 0.0008942155982367694, |
|
"learning_rate": 0.0001565467801802006, |
|
"loss": 46.0, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.12754771090095934, |
|
"grad_norm": 0.0006840588175691664, |
|
"learning_rate": 0.00015646009036982567, |
|
"loss": 46.0, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.1276842716192687, |
|
"grad_norm": 0.0009611474233679473, |
|
"learning_rate": 0.00015637333822919656, |
|
"loss": 46.0, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.1278208323375781, |
|
"grad_norm": 0.000530791119672358, |
|
"learning_rate": 0.00015628652385408508, |
|
"loss": 46.0, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.12795739305588746, |
|
"grad_norm": 0.00041494445758871734, |
|
"learning_rate": 0.00015619964734033172, |
|
"loss": 46.0, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.12809395377419686, |
|
"grad_norm": 0.0008938443497754633, |
|
"learning_rate": 0.00015611270878384552, |
|
"loss": 46.0, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.12823051449250622, |
|
"grad_norm": 0.000739588460419327, |
|
"learning_rate": 0.00015602570828060407, |
|
"loss": 46.0, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.1283670752108156, |
|
"grad_norm": 0.0025961471255868673, |
|
"learning_rate": 0.00015593864592665333, |
|
"loss": 46.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.12850363592912498, |
|
"grad_norm": 0.00049801473505795, |
|
"learning_rate": 0.00015585152181810753, |
|
"loss": 46.0, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.12864019664743437, |
|
"grad_norm": 0.000621246756054461, |
|
"learning_rate": 0.00015576433605114912, |
|
"loss": 46.0, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.12877675736574373, |
|
"grad_norm": 0.0007328742649406195, |
|
"learning_rate": 0.00015567708872202854, |
|
"loss": 46.0, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.12891331808405312, |
|
"grad_norm": 0.005625641904771328, |
|
"learning_rate": 0.00015558977992706426, |
|
"loss": 46.0, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.1290498788023625, |
|
"grad_norm": 0.0005238280282355845, |
|
"learning_rate": 0.00015550240976264253, |
|
"loss": 46.0, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.12918643952067188, |
|
"grad_norm": 0.001820914214476943, |
|
"learning_rate": 0.0001554149783252175, |
|
"loss": 46.0, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.12932300023898124, |
|
"grad_norm": 0.0006223174277693033, |
|
"learning_rate": 0.0001553274857113108, |
|
"loss": 46.0, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.12945956095729064, |
|
"grad_norm": 0.0015029089991003275, |
|
"learning_rate": 0.00015523993201751167, |
|
"loss": 46.0, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.1295961216756, |
|
"grad_norm": 0.0012488181237131357, |
|
"learning_rate": 0.00015515231734047677, |
|
"loss": 46.0, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.1297326823939094, |
|
"grad_norm": 0.0007277615368366241, |
|
"learning_rate": 0.0001550646417769301, |
|
"loss": 46.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.12986924311221876, |
|
"grad_norm": 0.0024758039508014917, |
|
"learning_rate": 0.0001549769054236629, |
|
"loss": 46.0, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.13000580383052815, |
|
"grad_norm": 0.00043562057544477284, |
|
"learning_rate": 0.00015488910837753342, |
|
"loss": 46.0, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.1301423645488375, |
|
"grad_norm": 0.0006010486977174878, |
|
"learning_rate": 0.00015480125073546704, |
|
"loss": 46.0, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.1302789252671469, |
|
"grad_norm": 0.00039981160080060363, |
|
"learning_rate": 0.0001547133325944559, |
|
"loss": 46.0, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.13041548598545627, |
|
"grad_norm": 0.00056524045066908, |
|
"learning_rate": 0.00015462535405155902, |
|
"loss": 46.0, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.13055204670376566, |
|
"grad_norm": 0.00019664541468955576, |
|
"learning_rate": 0.00015453731520390215, |
|
"loss": 46.0, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.13068860742207505, |
|
"grad_norm": 0.000747493002563715, |
|
"learning_rate": 0.0001544492161486775, |
|
"loss": 46.0, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.13082516814038442, |
|
"grad_norm": 0.00021618347091134638, |
|
"learning_rate": 0.00015436105698314384, |
|
"loss": 46.0, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.1309617288586938, |
|
"grad_norm": 0.0005658991285599768, |
|
"learning_rate": 0.0001542728378046262, |
|
"loss": 46.0, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.13109828957700317, |
|
"grad_norm": 0.0010320099536329508, |
|
"learning_rate": 0.00015418455871051592, |
|
"loss": 46.0, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.13123485029531257, |
|
"grad_norm": 0.00037752182106487453, |
|
"learning_rate": 0.00015409621979827048, |
|
"loss": 46.0, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.13137141101362193, |
|
"grad_norm": 0.0003615982714109123, |
|
"learning_rate": 0.0001540078211654135, |
|
"loss": 46.0, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.13150797173193132, |
|
"grad_norm": 0.00041104850242845714, |
|
"learning_rate": 0.0001539193629095343, |
|
"loss": 46.0, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.1316445324502407, |
|
"grad_norm": 0.0005330987041816115, |
|
"learning_rate": 0.00015383084512828824, |
|
"loss": 46.0, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.13178109316855008, |
|
"grad_norm": 0.0003800500126089901, |
|
"learning_rate": 0.00015374226791939628, |
|
"loss": 46.0, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.13191765388685944, |
|
"grad_norm": 0.0006545698852278292, |
|
"learning_rate": 0.000153653631380645, |
|
"loss": 46.0, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.13205421460516883, |
|
"grad_norm": 0.0020852810703217983, |
|
"learning_rate": 0.0001535649356098865, |
|
"loss": 46.0, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.1321907753234782, |
|
"grad_norm": 0.00031325622694566846, |
|
"learning_rate": 0.00015347618070503827, |
|
"loss": 46.0, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.1323273360417876, |
|
"grad_norm": 0.00019234443607274443, |
|
"learning_rate": 0.0001533873667640831, |
|
"loss": 46.0, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.13246389676009696, |
|
"grad_norm": 0.00038069483707658947, |
|
"learning_rate": 0.00015329849388506886, |
|
"loss": 46.0, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.13260045747840635, |
|
"grad_norm": 0.00045511999633163214, |
|
"learning_rate": 0.00015320956216610866, |
|
"loss": 46.0, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.1327370181967157, |
|
"grad_norm": 0.0005886334110982716, |
|
"learning_rate": 0.00015312057170538035, |
|
"loss": 46.0, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.1328735789150251, |
|
"grad_norm": 0.0009905985789373517, |
|
"learning_rate": 0.00015303152260112682, |
|
"loss": 46.0, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.13301013963333447, |
|
"grad_norm": 0.0010108448332175612, |
|
"learning_rate": 0.00015294241495165557, |
|
"loss": 46.0, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.13314670035164386, |
|
"grad_norm": 0.0005389642901718616, |
|
"learning_rate": 0.00015285324885533884, |
|
"loss": 46.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.13328326106995322, |
|
"grad_norm": 0.000410493987146765, |
|
"learning_rate": 0.0001527640244106133, |
|
"loss": 46.0, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.13341982178826262, |
|
"grad_norm": 0.001756677869707346, |
|
"learning_rate": 0.00015267474171598005, |
|
"loss": 46.0, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.13355638250657198, |
|
"grad_norm": 0.0003254815237596631, |
|
"learning_rate": 0.0001525854008700046, |
|
"loss": 46.0, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.13369294322488137, |
|
"grad_norm": 0.0003471802920103073, |
|
"learning_rate": 0.00015249600197131651, |
|
"loss": 46.0, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.13382950394319074, |
|
"grad_norm": 0.0015930512454360723, |
|
"learning_rate": 0.0001524065451186095, |
|
"loss": 46.0, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.13396606466150013, |
|
"grad_norm": 0.00028746266616508365, |
|
"learning_rate": 0.0001523170304106413, |
|
"loss": 46.0, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.1341026253798095, |
|
"grad_norm": 0.0005899532698094845, |
|
"learning_rate": 0.0001522274579462334, |
|
"loss": 46.0, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.13423918609811888, |
|
"grad_norm": 0.0005316737224347889, |
|
"learning_rate": 0.00015213782782427123, |
|
"loss": 46.0, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.13437574681642825, |
|
"grad_norm": 0.00045934764784760773, |
|
"learning_rate": 0.00015204814014370372, |
|
"loss": 46.0, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.13451230753473764, |
|
"grad_norm": 0.001285345759242773, |
|
"learning_rate": 0.00015195839500354335, |
|
"loss": 46.0, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.134648868253047, |
|
"grad_norm": 0.0005873920163139701, |
|
"learning_rate": 0.00015186859250286615, |
|
"loss": 46.0, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.1347854289713564, |
|
"grad_norm": 0.0011875568889081478, |
|
"learning_rate": 0.00015177873274081137, |
|
"loss": 46.0, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.13492198968966576, |
|
"grad_norm": 0.001056193490512669, |
|
"learning_rate": 0.00015168881581658147, |
|
"loss": 46.0, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.13505855040797515, |
|
"grad_norm": 0.0009340514661744237, |
|
"learning_rate": 0.00015159884182944211, |
|
"loss": 46.0, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.13519511112628452, |
|
"grad_norm": 0.0011555576929822564, |
|
"learning_rate": 0.00015150881087872185, |
|
"loss": 46.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.1353316718445939, |
|
"grad_norm": 0.001084683695808053, |
|
"learning_rate": 0.00015141872306381215, |
|
"loss": 46.0, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.13546823256290327, |
|
"grad_norm": 0.000519106222782284, |
|
"learning_rate": 0.00015132857848416733, |
|
"loss": 46.0, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.13560479328121267, |
|
"grad_norm": 0.001280359923839569, |
|
"learning_rate": 0.00015123837723930424, |
|
"loss": 46.0, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.13574135399952203, |
|
"grad_norm": 0.0007028987165540457, |
|
"learning_rate": 0.00015114811942880242, |
|
"loss": 46.0, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.13587791471783142, |
|
"grad_norm": 0.0005714019644074142, |
|
"learning_rate": 0.00015105780515230376, |
|
"loss": 46.0, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.13601447543614079, |
|
"grad_norm": 0.0009716853965073824, |
|
"learning_rate": 0.00015096743450951258, |
|
"loss": 46.0, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.13615103615445018, |
|
"grad_norm": 0.0006497084395959973, |
|
"learning_rate": 0.00015087700760019532, |
|
"loss": 46.0, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.13628759687275954, |
|
"grad_norm": 0.0013521420769393444, |
|
"learning_rate": 0.00015078652452418063, |
|
"loss": 46.0, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.13642415759106893, |
|
"grad_norm": 0.0008505037403665483, |
|
"learning_rate": 0.00015069598538135906, |
|
"loss": 46.0, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.1365607183093783, |
|
"grad_norm": 0.0011916455114260316, |
|
"learning_rate": 0.00015060539027168316, |
|
"loss": 46.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1366972790276877, |
|
"grad_norm": 0.0005008620209991932, |
|
"learning_rate": 0.00015051473929516722, |
|
"loss": 46.0, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.13683383974599705, |
|
"grad_norm": 0.0006068138754926622, |
|
"learning_rate": 0.00015042403255188723, |
|
"loss": 46.0, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.13697040046430645, |
|
"grad_norm": 0.0003954765561502427, |
|
"learning_rate": 0.00015033327014198075, |
|
"loss": 46.0, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.1371069611826158, |
|
"grad_norm": 0.0005776135949417949, |
|
"learning_rate": 0.00015024245216564667, |
|
"loss": 46.0, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.1372435219009252, |
|
"grad_norm": 0.0008297267486341298, |
|
"learning_rate": 0.00015015157872314542, |
|
"loss": 46.0, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.13738008261923457, |
|
"grad_norm": 0.000555426231585443, |
|
"learning_rate": 0.00015006064991479853, |
|
"loss": 46.0, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.13751664333754396, |
|
"grad_norm": 0.0002457842347212136, |
|
"learning_rate": 0.0001499696658409887, |
|
"loss": 46.0, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.13765320405585332, |
|
"grad_norm": 0.0005408066790550947, |
|
"learning_rate": 0.00014987862660215966, |
|
"loss": 46.0, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.13778976477416272, |
|
"grad_norm": 0.0010539703071117401, |
|
"learning_rate": 0.00014978753229881594, |
|
"loss": 46.0, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.13792632549247208, |
|
"grad_norm": 0.00045947683975100517, |
|
"learning_rate": 0.00014969638303152295, |
|
"loss": 46.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.13806288621078147, |
|
"grad_norm": 0.0004952670424245298, |
|
"learning_rate": 0.0001496051789009068, |
|
"loss": 46.0, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.13819944692909084, |
|
"grad_norm": 0.0006827415782026947, |
|
"learning_rate": 0.00014951392000765411, |
|
"loss": 46.0, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.13833600764740023, |
|
"grad_norm": 0.000486463715787977, |
|
"learning_rate": 0.000149422606452512, |
|
"loss": 46.0, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.1384725683657096, |
|
"grad_norm": 0.0002832711033988744, |
|
"learning_rate": 0.00014933123833628785, |
|
"loss": 46.0, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.13860912908401898, |
|
"grad_norm": 0.0004940008511766791, |
|
"learning_rate": 0.00014923981575984936, |
|
"loss": 46.0, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.13874568980232835, |
|
"grad_norm": 0.0008655837154947221, |
|
"learning_rate": 0.00014914833882412435, |
|
"loss": 46.0, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.13888225052063774, |
|
"grad_norm": 0.0007375786663033068, |
|
"learning_rate": 0.00014905680763010058, |
|
"loss": 46.0, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.1390188112389471, |
|
"grad_norm": 0.0006843106239102781, |
|
"learning_rate": 0.00014896522227882578, |
|
"loss": 46.0, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.1391553719572565, |
|
"grad_norm": 0.001130104181356728, |
|
"learning_rate": 0.00014887358287140744, |
|
"loss": 46.0, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.13929193267556586, |
|
"grad_norm": 0.0008909485186450183, |
|
"learning_rate": 0.00014878188950901276, |
|
"loss": 46.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.13942849339387525, |
|
"grad_norm": 0.005642786156386137, |
|
"learning_rate": 0.0001486901422928684, |
|
"loss": 46.0, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.13956505411218462, |
|
"grad_norm": 0.0005347135593183339, |
|
"learning_rate": 0.0001485983413242606, |
|
"loss": 46.0, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.139701614830494, |
|
"grad_norm": 0.000356485164957121, |
|
"learning_rate": 0.00014850648670453493, |
|
"loss": 46.0, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.13983817554880337, |
|
"grad_norm": 0.0006373700452968478, |
|
"learning_rate": 0.00014841457853509606, |
|
"loss": 46.0, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.13997473626711276, |
|
"grad_norm": 0.00020091190526727587, |
|
"learning_rate": 0.0001483226169174079, |
|
"loss": 46.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.14011129698542216, |
|
"grad_norm": 0.00042303564259782434, |
|
"learning_rate": 0.00014823060195299337, |
|
"loss": 46.0, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.14024785770373152, |
|
"grad_norm": 0.0004543966497294605, |
|
"learning_rate": 0.00014813853374343419, |
|
"loss": 46.0, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.1403844184220409, |
|
"grad_norm": 0.00042528280755504966, |
|
"learning_rate": 0.00014804641239037097, |
|
"loss": 46.0, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.14052097914035028, |
|
"grad_norm": 0.0005864663980901241, |
|
"learning_rate": 0.00014795423799550284, |
|
"loss": 46.0, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.14065753985865967, |
|
"grad_norm": 0.0005385400145314634, |
|
"learning_rate": 0.00014786201066058766, |
|
"loss": 46.0, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.14079410057696903, |
|
"grad_norm": 0.0011653905967250466, |
|
"learning_rate": 0.00014776973048744165, |
|
"loss": 46.0, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.14093066129527843, |
|
"grad_norm": 0.0004561395035125315, |
|
"learning_rate": 0.0001476773975779393, |
|
"loss": 46.0, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.1410672220135878, |
|
"grad_norm": 0.000643297506030649, |
|
"learning_rate": 0.00014758501203401348, |
|
"loss": 46.0, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.14120378273189718, |
|
"grad_norm": 0.0006550021353177726, |
|
"learning_rate": 0.00014749257395765502, |
|
"loss": 46.0, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.14134034345020655, |
|
"grad_norm": 0.0007623559795320034, |
|
"learning_rate": 0.0001474000834509128, |
|
"loss": 46.0, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.14147690416851594, |
|
"grad_norm": 0.000623580242972821, |
|
"learning_rate": 0.00014730754061589355, |
|
"loss": 46.0, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.1416134648868253, |
|
"grad_norm": 0.0010572531027719378, |
|
"learning_rate": 0.00014721494555476188, |
|
"loss": 46.0, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.1417500256051347, |
|
"grad_norm": 0.0011201991001144052, |
|
"learning_rate": 0.00014712229836973988, |
|
"loss": 46.0, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.14188658632344406, |
|
"grad_norm": 0.001960804220288992, |
|
"learning_rate": 0.00014702959916310736, |
|
"loss": 46.0, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.14202314704175345, |
|
"grad_norm": 0.0050703976303339005, |
|
"learning_rate": 0.00014693684803720138, |
|
"loss": 46.0, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.14215970776006281, |
|
"grad_norm": 0.00040476518915966153, |
|
"learning_rate": 0.0001468440450944165, |
|
"loss": 46.0, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.1422962684783722, |
|
"grad_norm": 0.0007858510361984372, |
|
"learning_rate": 0.00014675119043720437, |
|
"loss": 46.0, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.14243282919668157, |
|
"grad_norm": 0.0007758094579912722, |
|
"learning_rate": 0.0001466582841680737, |
|
"loss": 46.0, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.14256938991499096, |
|
"grad_norm": 0.0008653284166939557, |
|
"learning_rate": 0.00014656532638959035, |
|
"loss": 46.0, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.14270595063330033, |
|
"grad_norm": 0.0004421341873239726, |
|
"learning_rate": 0.00014647231720437686, |
|
"loss": 46.0, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.14284251135160972, |
|
"grad_norm": 0.0008486118167638779, |
|
"learning_rate": 0.0001463792567151126, |
|
"loss": 46.0, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.14297907206991908, |
|
"grad_norm": 0.000525649928022176, |
|
"learning_rate": 0.0001462861450245336, |
|
"loss": 46.0, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.14311563278822848, |
|
"grad_norm": 0.0017683632904663682, |
|
"learning_rate": 0.00014619298223543235, |
|
"loss": 46.0, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.14325219350653784, |
|
"grad_norm": 0.0012217290932312608, |
|
"learning_rate": 0.00014609976845065783, |
|
"loss": 46.0, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.14338875422484723, |
|
"grad_norm": 0.0011137262918055058, |
|
"learning_rate": 0.00014600650377311522, |
|
"loss": 46.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.1435253149431566, |
|
"grad_norm": 0.0003707687428686768, |
|
"learning_rate": 0.00014591318830576598, |
|
"loss": 46.0, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.143661875661466, |
|
"grad_norm": 0.000414914742577821, |
|
"learning_rate": 0.0001458198221516276, |
|
"loss": 46.0, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.14379843637977535, |
|
"grad_norm": 0.0008973225485533476, |
|
"learning_rate": 0.0001457264054137735, |
|
"loss": 46.0, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.14393499709808474, |
|
"grad_norm": 0.00040008637006394565, |
|
"learning_rate": 0.000145632938195333, |
|
"loss": 46.0, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.1440715578163941, |
|
"grad_norm": 0.0005400644731707871, |
|
"learning_rate": 0.0001455394205994911, |
|
"loss": 46.0, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.1442081185347035, |
|
"grad_norm": 0.00028061779448762536, |
|
"learning_rate": 0.00014544585272948843, |
|
"loss": 46.0, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.14434467925301286, |
|
"grad_norm": 0.0007635858491994441, |
|
"learning_rate": 0.00014535223468862114, |
|
"loss": 46.0, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.14448123997132226, |
|
"grad_norm": 0.0004812279948964715, |
|
"learning_rate": 0.00014525856658024076, |
|
"loss": 46.0, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.14461780068963162, |
|
"grad_norm": 0.0004690811620093882, |
|
"learning_rate": 0.00014516484850775406, |
|
"loss": 46.0, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.144754361407941, |
|
"grad_norm": 0.0008514091605320573, |
|
"learning_rate": 0.00014507108057462296, |
|
"loss": 46.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.14489092212625038, |
|
"grad_norm": 0.00031304339063353837, |
|
"learning_rate": 0.00014497726288436458, |
|
"loss": 46.0, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.14502748284455977, |
|
"grad_norm": 0.00015942241589073092, |
|
"learning_rate": 0.00014488339554055073, |
|
"loss": 46.0, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.14516404356286913, |
|
"grad_norm": 0.0012250308645889163, |
|
"learning_rate": 0.0001447894786468082, |
|
"loss": 46.0, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.14530060428117852, |
|
"grad_norm": 0.00036729895509779453, |
|
"learning_rate": 0.00014469551230681844, |
|
"loss": 46.0, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.1454371649994879, |
|
"grad_norm": 0.000257661915384233, |
|
"learning_rate": 0.00014460149662431747, |
|
"loss": 46.0, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.14557372571779728, |
|
"grad_norm": 0.000458430964499712, |
|
"learning_rate": 0.00014450743170309584, |
|
"loss": 46.0, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.14571028643610665, |
|
"grad_norm": 0.0004429294203873724, |
|
"learning_rate": 0.00014441331764699836, |
|
"loss": 46.0, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.14584684715441604, |
|
"grad_norm": 0.0005923425196669996, |
|
"learning_rate": 0.00014431915455992414, |
|
"loss": 46.0, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.1459834078727254, |
|
"grad_norm": 0.0003316183283459395, |
|
"learning_rate": 0.00014422494254582647, |
|
"loss": 46.0, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.1461199685910348, |
|
"grad_norm": 0.0004024969239253551, |
|
"learning_rate": 0.0001441306817087125, |
|
"loss": 46.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.14625652930934416, |
|
"grad_norm": 0.00029238680144771934, |
|
"learning_rate": 0.00014403637215264353, |
|
"loss": 46.0, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.14639309002765355, |
|
"grad_norm": 0.00044409476686269045, |
|
"learning_rate": 0.00014394201398173437, |
|
"loss": 46.0, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.1465296507459629, |
|
"grad_norm": 0.000767083081882447, |
|
"learning_rate": 0.00014384760730015364, |
|
"loss": 46.0, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.1466662114642723, |
|
"grad_norm": 0.00029706236091442406, |
|
"learning_rate": 0.00014375315221212357, |
|
"loss": 46.0, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.14680277218258167, |
|
"grad_norm": 0.0004188843595329672, |
|
"learning_rate": 0.00014365864882191968, |
|
"loss": 46.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.14693933290089106, |
|
"grad_norm": 0.0005889105377718806, |
|
"learning_rate": 0.0001435640972338709, |
|
"loss": 46.0, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.14707589361920043, |
|
"grad_norm": 0.00072791165439412, |
|
"learning_rate": 0.00014346949755235944, |
|
"loss": 46.0, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.14721245433750982, |
|
"grad_norm": 0.0006283425609581172, |
|
"learning_rate": 0.00014337484988182042, |
|
"loss": 46.0, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.14734901505581918, |
|
"grad_norm": 0.000486049015307799, |
|
"learning_rate": 0.00014328015432674214, |
|
"loss": 46.0, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.14748557577412857, |
|
"grad_norm": 0.0007576828938908875, |
|
"learning_rate": 0.00014318541099166555, |
|
"loss": 46.0, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.14762213649243794, |
|
"grad_norm": 0.0010862492490559816, |
|
"learning_rate": 0.00014309061998118454, |
|
"loss": 46.0, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.14775869721074733, |
|
"grad_norm": 0.0005167250637896359, |
|
"learning_rate": 0.00014299578139994557, |
|
"loss": 46.0, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.1478952579290567, |
|
"grad_norm": 0.002369736786931753, |
|
"learning_rate": 0.00014290089535264755, |
|
"loss": 46.0, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.1480318186473661, |
|
"grad_norm": 0.0003688117431011051, |
|
"learning_rate": 0.0001428059619440419, |
|
"loss": 46.0, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.14816837936567545, |
|
"grad_norm": 0.0003458712890278548, |
|
"learning_rate": 0.00014271098127893218, |
|
"loss": 46.0, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.14830494008398484, |
|
"grad_norm": 0.0013860361650586128, |
|
"learning_rate": 0.0001426159534621743, |
|
"loss": 46.0, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.1484415008022942, |
|
"grad_norm": 0.001035936875268817, |
|
"learning_rate": 0.00014252087859867608, |
|
"loss": 46.0, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.1485780615206036, |
|
"grad_norm": 0.00632870476692915, |
|
"learning_rate": 0.00014242575679339738, |
|
"loss": 46.0, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.14871462223891296, |
|
"grad_norm": 0.0006592991994693875, |
|
"learning_rate": 0.00014233058815134978, |
|
"loss": 46.0, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.14885118295722236, |
|
"grad_norm": 0.0028478745371103287, |
|
"learning_rate": 0.00014223537277759666, |
|
"loss": 46.0, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.14898774367553172, |
|
"grad_norm": 0.0005855032941326499, |
|
"learning_rate": 0.00014214011077725292, |
|
"loss": 46.0, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.1491243043938411, |
|
"grad_norm": 0.0023942850530147552, |
|
"learning_rate": 0.00014204480225548494, |
|
"loss": 46.0, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.1492608651121505, |
|
"grad_norm": 0.0013539772480726242, |
|
"learning_rate": 0.00014194944731751058, |
|
"loss": 46.0, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.14939742583045987, |
|
"grad_norm": 0.0009589577093720436, |
|
"learning_rate": 0.00014185404606859877, |
|
"loss": 46.0, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.14953398654876926, |
|
"grad_norm": 0.0003180347557645291, |
|
"learning_rate": 0.00014175859861406966, |
|
"loss": 46.0, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.14967054726707862, |
|
"grad_norm": 0.0014342650538310409, |
|
"learning_rate": 0.00014166310505929434, |
|
"loss": 46.0, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.14980710798538802, |
|
"grad_norm": 0.0006643772940151393, |
|
"learning_rate": 0.00014156756550969492, |
|
"loss": 46.0, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.14994366870369738, |
|
"grad_norm": 0.001409722724929452, |
|
"learning_rate": 0.00014147198007074415, |
|
"loss": 46.0, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.15008022942200677, |
|
"grad_norm": 0.0008714981959201396, |
|
"learning_rate": 0.00014137634884796557, |
|
"loss": 46.0, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.15021679014031614, |
|
"grad_norm": 0.00160513399168849, |
|
"learning_rate": 0.00014128067194693316, |
|
"loss": 46.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.15035335085862553, |
|
"grad_norm": 0.0004049557028338313, |
|
"learning_rate": 0.0001411849494732713, |
|
"loss": 46.0, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.1504899115769349, |
|
"grad_norm": 0.0005186764756217599, |
|
"learning_rate": 0.00014108918153265485, |
|
"loss": 46.0, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.15062647229524428, |
|
"grad_norm": 0.002497048582881689, |
|
"learning_rate": 0.00014099336823080865, |
|
"loss": 46.0, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.15076303301355365, |
|
"grad_norm": 0.00020665116608142853, |
|
"learning_rate": 0.00014089750967350781, |
|
"loss": 46.0, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.15089959373186304, |
|
"grad_norm": 0.0005294004804454744, |
|
"learning_rate": 0.0001408016059665773, |
|
"loss": 46.0, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.1510361544501724, |
|
"grad_norm": 0.00033093662932515144, |
|
"learning_rate": 0.00014070565721589195, |
|
"loss": 46.0, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.1511727151684818, |
|
"grad_norm": 0.0003122551424894482, |
|
"learning_rate": 0.00014060966352737628, |
|
"loss": 46.0, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.15130927588679116, |
|
"grad_norm": 0.0006798842805437744, |
|
"learning_rate": 0.00014051362500700447, |
|
"loss": 46.0, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.15144583660510055, |
|
"grad_norm": 0.0005911933840252459, |
|
"learning_rate": 0.00014041754176080017, |
|
"loss": 46.0, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.15158239732340992, |
|
"grad_norm": 0.0010168857406824827, |
|
"learning_rate": 0.00014032141389483648, |
|
"loss": 46.0, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.1517189580417193, |
|
"grad_norm": 0.00033409081515856087, |
|
"learning_rate": 0.00014022524151523563, |
|
"loss": 46.0, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.15185551876002867, |
|
"grad_norm": 0.0002926045854110271, |
|
"learning_rate": 0.00014012902472816907, |
|
"loss": 46.0, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.15199207947833807, |
|
"grad_norm": 0.0013335029361769557, |
|
"learning_rate": 0.00014003276363985727, |
|
"loss": 46.0, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.15212864019664743, |
|
"grad_norm": 0.000854438403621316, |
|
"learning_rate": 0.00013993645835656953, |
|
"loss": 46.0, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.15226520091495682, |
|
"grad_norm": 0.0009656418114900589, |
|
"learning_rate": 0.00013984010898462416, |
|
"loss": 46.0, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.1524017616332662, |
|
"grad_norm": 0.00042812732863239944, |
|
"learning_rate": 0.00013974371563038785, |
|
"loss": 46.0, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.15253832235157558, |
|
"grad_norm": 0.0006366227171383798, |
|
"learning_rate": 0.00013964727840027604, |
|
"loss": 46.0, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.15267488306988494, |
|
"grad_norm": 0.0011450116289779544, |
|
"learning_rate": 0.00013955079740075256, |
|
"loss": 46.0, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.15281144378819433, |
|
"grad_norm": 0.00023903950932435691, |
|
"learning_rate": 0.00013945427273832954, |
|
"loss": 46.0, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.1529480045065037, |
|
"grad_norm": 0.0011047361185774207, |
|
"learning_rate": 0.0001393577045195673, |
|
"loss": 46.0, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.1530845652248131, |
|
"grad_norm": 0.0004785344353877008, |
|
"learning_rate": 0.0001392610928510743, |
|
"loss": 46.0, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.15322112594312245, |
|
"grad_norm": 0.0008018110529519618, |
|
"learning_rate": 0.00013916443783950694, |
|
"loss": 46.0, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.15335768666143185, |
|
"grad_norm": 0.0012497154530137777, |
|
"learning_rate": 0.00013906773959156948, |
|
"loss": 46.0, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.1534942473797412, |
|
"grad_norm": 0.00042997964192181826, |
|
"learning_rate": 0.00013897099821401384, |
|
"loss": 46.0, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.1536308080980506, |
|
"grad_norm": 0.0003085716161876917, |
|
"learning_rate": 0.00013887421381363968, |
|
"loss": 46.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.15376736881635997, |
|
"grad_norm": 0.000396199116948992, |
|
"learning_rate": 0.00013877738649729405, |
|
"loss": 46.0, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.15390392953466936, |
|
"grad_norm": 0.0006908946088515222, |
|
"learning_rate": 0.00013868051637187144, |
|
"loss": 46.0, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.15404049025297872, |
|
"grad_norm": 0.0003736602666322142, |
|
"learning_rate": 0.00013858360354431355, |
|
"loss": 46.0, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.15417705097128812, |
|
"grad_norm": 0.0006938680890016258, |
|
"learning_rate": 0.00013848664812160925, |
|
"loss": 46.0, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.15431361168959748, |
|
"grad_norm": 0.0005900769028812647, |
|
"learning_rate": 0.00013838965021079446, |
|
"loss": 46.0, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.15445017240790687, |
|
"grad_norm": 0.002096734009683132, |
|
"learning_rate": 0.00013829260991895197, |
|
"loss": 46.0, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.15458673312621624, |
|
"grad_norm": 0.0011866495478898287, |
|
"learning_rate": 0.00013819552735321134, |
|
"loss": 46.0, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.15472329384452563, |
|
"grad_norm": 0.00037106405943632126, |
|
"learning_rate": 0.00013809840262074885, |
|
"loss": 46.0, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.154859854562835, |
|
"grad_norm": 0.0006705286214128137, |
|
"learning_rate": 0.0001380012358287873, |
|
"loss": 46.0, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.15499641528114438, |
|
"grad_norm": 0.00040015694685280323, |
|
"learning_rate": 0.0001379040270845959, |
|
"loss": 46.0, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.15513297599945375, |
|
"grad_norm": 0.0005712392157875001, |
|
"learning_rate": 0.00013780677649549025, |
|
"loss": 46.0, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.15526953671776314, |
|
"grad_norm": 0.00047067005652934313, |
|
"learning_rate": 0.00013770948416883205, |
|
"loss": 46.0, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.1554060974360725, |
|
"grad_norm": 0.0008378413622267544, |
|
"learning_rate": 0.00013761215021202916, |
|
"loss": 46.0, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.1555426581543819, |
|
"grad_norm": 0.0007883374928496778, |
|
"learning_rate": 0.00013751477473253533, |
|
"loss": 46.0, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.15567921887269126, |
|
"grad_norm": 0.0012233637971803546, |
|
"learning_rate": 0.0001374173578378502, |
|
"loss": 46.0, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.15581577959100065, |
|
"grad_norm": 0.004130385350435972, |
|
"learning_rate": 0.00013731989963551913, |
|
"loss": 46.0, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.15595234030931002, |
|
"grad_norm": 0.0029437027405947447, |
|
"learning_rate": 0.00013722240023313306, |
|
"loss": 46.0, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.1560889010276194, |
|
"grad_norm": 0.0003660391375888139, |
|
"learning_rate": 0.00013712485973832838, |
|
"loss": 46.0, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.15622546174592877, |
|
"grad_norm": 0.0020895125344395638, |
|
"learning_rate": 0.00013702727825878693, |
|
"loss": 46.0, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.15636202246423817, |
|
"grad_norm": 0.0016986053669825196, |
|
"learning_rate": 0.00013692965590223573, |
|
"loss": 46.0, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.15649858318254753, |
|
"grad_norm": 0.0005671007093042135, |
|
"learning_rate": 0.00013683199277644693, |
|
"loss": 46.0, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.15663514390085692, |
|
"grad_norm": 0.0009818869875743985, |
|
"learning_rate": 0.00013673428898923774, |
|
"loss": 46.0, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.15677170461916629, |
|
"grad_norm": 0.0004315339319873601, |
|
"learning_rate": 0.00013663654464847022, |
|
"loss": 46.0, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.15690826533747568, |
|
"grad_norm": 0.0006189457490108907, |
|
"learning_rate": 0.0001365387598620512, |
|
"loss": 46.0, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.15704482605578504, |
|
"grad_norm": 0.0007538103964179754, |
|
"learning_rate": 0.00013644093473793215, |
|
"loss": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.15718138677409443, |
|
"grad_norm": 0.0005718530155718327, |
|
"learning_rate": 0.00013634306938410911, |
|
"loss": 46.0, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.1573179474924038, |
|
"grad_norm": 0.0006374249351210892, |
|
"learning_rate": 0.00013624516390862244, |
|
"loss": 46.0, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.1574545082107132, |
|
"grad_norm": 0.0007302735466510057, |
|
"learning_rate": 0.00013614721841955692, |
|
"loss": 46.0, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.15759106892902255, |
|
"grad_norm": 0.0005098398542031646, |
|
"learning_rate": 0.00013604923302504147, |
|
"loss": 46.0, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.15772762964733195, |
|
"grad_norm": 0.0005700102774426341, |
|
"learning_rate": 0.00013595120783324902, |
|
"loss": 46.0, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.1578641903656413, |
|
"grad_norm": 0.00040146647370420396, |
|
"learning_rate": 0.00013585314295239644, |
|
"loss": 46.0, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.1580007510839507, |
|
"grad_norm": 0.0008773392182774842, |
|
"learning_rate": 0.00013575503849074444, |
|
"loss": 46.0, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.15813731180226007, |
|
"grad_norm": 0.0003678193024825305, |
|
"learning_rate": 0.0001356568945565974, |
|
"loss": 46.0, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.15827387252056946, |
|
"grad_norm": 0.0013553998433053493, |
|
"learning_rate": 0.0001355587112583033, |
|
"loss": 46.0, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.15841043323887882, |
|
"grad_norm": 0.0009887435007840395, |
|
"learning_rate": 0.00013546048870425356, |
|
"loss": 46.0, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.15854699395718821, |
|
"grad_norm": 0.0019033915596082807, |
|
"learning_rate": 0.00013536222700288303, |
|
"loss": 46.0, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.1586835546754976, |
|
"grad_norm": 0.0006128349923528731, |
|
"learning_rate": 0.00013526392626266956, |
|
"loss": 46.0, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.15882011539380697, |
|
"grad_norm": 0.0005847832653671503, |
|
"learning_rate": 0.00013516558659213432, |
|
"loss": 46.0, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.15895667611211636, |
|
"grad_norm": 0.0005258521996438503, |
|
"learning_rate": 0.00013506720809984137, |
|
"loss": 46.0, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.15909323683042573, |
|
"grad_norm": 0.00040099999750964344, |
|
"learning_rate": 0.0001349687908943976, |
|
"loss": 46.0, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.15922979754873512, |
|
"grad_norm": 0.0007036002352833748, |
|
"learning_rate": 0.0001348703350844527, |
|
"loss": 46.0, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.15936635826704448, |
|
"grad_norm": 0.00037679230445064604, |
|
"learning_rate": 0.00013477184077869892, |
|
"loss": 46.0, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.15950291898535388, |
|
"grad_norm": 0.0005223838961683214, |
|
"learning_rate": 0.000134673308085871, |
|
"loss": 46.0, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.15963947970366324, |
|
"grad_norm": 0.0008007617434486747, |
|
"learning_rate": 0.0001345747371147461, |
|
"loss": 46.0, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.15977604042197263, |
|
"grad_norm": 0.000517090258654207, |
|
"learning_rate": 0.0001344761279741437, |
|
"loss": 46.0, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.159912601140282, |
|
"grad_norm": 0.0008526128367520869, |
|
"learning_rate": 0.0001343774807729253, |
|
"loss": 46.0, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.1600491618585914, |
|
"grad_norm": 0.0011307375971227884, |
|
"learning_rate": 0.0001342787956199945, |
|
"loss": 46.0, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.16018572257690075, |
|
"grad_norm": 0.00033380292006768286, |
|
"learning_rate": 0.00013418007262429668, |
|
"loss": 46.0, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.16032228329521014, |
|
"grad_norm": 0.000278162129689008, |
|
"learning_rate": 0.00013408131189481911, |
|
"loss": 46.0, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.1604588440135195, |
|
"grad_norm": 0.00258398219011724, |
|
"learning_rate": 0.00013398251354059077, |
|
"loss": 46.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.1605954047318289, |
|
"grad_norm": 0.0006168753025121987, |
|
"learning_rate": 0.000133883677670682, |
|
"loss": 46.0, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.16073196545013826, |
|
"grad_norm": 0.00029901755624450743, |
|
"learning_rate": 0.0001337848043942047, |
|
"loss": 46.0, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.16086852616844766, |
|
"grad_norm": 0.000997790601104498, |
|
"learning_rate": 0.00013368589382031196, |
|
"loss": 46.0, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.16100508688675702, |
|
"grad_norm": 0.0006558905588462949, |
|
"learning_rate": 0.00013358694605819814, |
|
"loss": 46.0, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.1611416476050664, |
|
"grad_norm": 0.0006217307527549565, |
|
"learning_rate": 0.00013348796121709862, |
|
"loss": 46.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.16127820832337578, |
|
"grad_norm": 0.0004898210754618049, |
|
"learning_rate": 0.00013338893940628973, |
|
"loss": 46.0, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.16141476904168517, |
|
"grad_norm": 0.0009382545249536633, |
|
"learning_rate": 0.00013328988073508852, |
|
"loss": 46.0, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.16155132975999453, |
|
"grad_norm": 0.0006358098471537232, |
|
"learning_rate": 0.00013319078531285285, |
|
"loss": 46.0, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.16168789047830393, |
|
"grad_norm": 0.0012355463113635778, |
|
"learning_rate": 0.00013309165324898112, |
|
"loss": 46.0, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.1618244511966133, |
|
"grad_norm": 0.0005907994927838445, |
|
"learning_rate": 0.00013299248465291214, |
|
"loss": 46.0, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.16196101191492268, |
|
"grad_norm": 0.002715210895985365, |
|
"learning_rate": 0.00013289327963412513, |
|
"loss": 46.0, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.16209757263323205, |
|
"grad_norm": 0.001038241432979703, |
|
"learning_rate": 0.00013279403830213942, |
|
"loss": 46.0, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.16223413335154144, |
|
"grad_norm": 0.0015468295896425843, |
|
"learning_rate": 0.00013269476076651447, |
|
"loss": 46.0, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.1623706940698508, |
|
"grad_norm": 0.0017287740483880043, |
|
"learning_rate": 0.00013259544713684974, |
|
"loss": 46.0, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.1625072547881602, |
|
"grad_norm": 0.0018615883309394121, |
|
"learning_rate": 0.00013249609752278454, |
|
"loss": 46.0, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.16264381550646956, |
|
"grad_norm": 0.00047380104660987854, |
|
"learning_rate": 0.0001323967120339978, |
|
"loss": 46.0, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.16278037622477895, |
|
"grad_norm": 0.001483297673985362, |
|
"learning_rate": 0.00013229729078020823, |
|
"loss": 46.0, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.16291693694308831, |
|
"grad_norm": 0.0004559273656923324, |
|
"learning_rate": 0.00013219783387117385, |
|
"loss": 46.0, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.1630534976613977, |
|
"grad_norm": 0.0008615512633696198, |
|
"learning_rate": 0.00013209834141669213, |
|
"loss": 46.0, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.16319005837970707, |
|
"grad_norm": 0.0005908702732995152, |
|
"learning_rate": 0.0001319988135265998, |
|
"loss": 46.0, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.16332661909801646, |
|
"grad_norm": 0.0008730573463253677, |
|
"learning_rate": 0.00013189925031077267, |
|
"loss": 46.0, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.16346317981632583, |
|
"grad_norm": 0.0012465447653084993, |
|
"learning_rate": 0.00013179965187912554, |
|
"loss": 46.0, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.16359974053463522, |
|
"grad_norm": 0.0013986461563035846, |
|
"learning_rate": 0.00013170001834161209, |
|
"loss": 46.0, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.16373630125294458, |
|
"grad_norm": 0.0011632711393758655, |
|
"learning_rate": 0.0001316003498082248, |
|
"loss": 46.0, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.16387286197125397, |
|
"grad_norm": 0.001200903090648353, |
|
"learning_rate": 0.0001315006463889948, |
|
"loss": 46.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.16387286197125397, |
|
"eval_loss": 11.5, |
|
"eval_runtime": 20.5706, |
|
"eval_samples_per_second": 149.923, |
|
"eval_steps_per_second": 74.961, |
|
"step": 1200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 42923841650688.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|