|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 512, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00390625, |
|
"grad_norm": 13.52961750257624, |
|
"learning_rate": 3.846153846153847e-07, |
|
"loss": 1.2001, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01953125, |
|
"grad_norm": 14.719058439285638, |
|
"learning_rate": 1.9230769230769234e-06, |
|
"loss": 1.2458, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0390625, |
|
"grad_norm": 4.77060856046236, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 1.1124, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05859375, |
|
"grad_norm": 2.4602749084741933, |
|
"learning_rate": 5.769230769230769e-06, |
|
"loss": 1.0517, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 1.7463079572164537, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 0.9195, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09765625, |
|
"grad_norm": 1.3968704201975504, |
|
"learning_rate": 9.615384615384616e-06, |
|
"loss": 0.9425, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.1171875, |
|
"grad_norm": 1.427533324978979, |
|
"learning_rate": 1.1538461538461538e-05, |
|
"loss": 0.93, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13671875, |
|
"grad_norm": 1.3118669922360116, |
|
"learning_rate": 1.3461538461538463e-05, |
|
"loss": 0.8991, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 1.2907394066527953, |
|
"learning_rate": 1.5384615384615387e-05, |
|
"loss": 0.8919, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17578125, |
|
"grad_norm": 1.4159243735379308, |
|
"learning_rate": 1.730769230769231e-05, |
|
"loss": 0.8803, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"grad_norm": 1.299112560801352, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.8959, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21484375, |
|
"grad_norm": 1.2702838813576585, |
|
"learning_rate": 1.99979011499924e-05, |
|
"loss": 0.8755, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 1.367440629877651, |
|
"learning_rate": 1.998507803482828e-05, |
|
"loss": 0.9024, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.25390625, |
|
"grad_norm": 1.2280977464389735, |
|
"learning_rate": 1.996061276533154e-05, |
|
"loss": 0.8664, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2734375, |
|
"grad_norm": 1.3022228088323475, |
|
"learning_rate": 1.9924533866912017e-05, |
|
"loss": 0.8665, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.29296875, |
|
"grad_norm": 1.2078818317069273, |
|
"learning_rate": 1.9876883405951378e-05, |
|
"loss": 0.8624, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 1.5644048727951139, |
|
"learning_rate": 1.9817716940755586e-05, |
|
"loss": 0.8761, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33203125, |
|
"grad_norm": 1.3636980168093404, |
|
"learning_rate": 1.9747103456776406e-05, |
|
"loss": 0.8376, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3515625, |
|
"grad_norm": 1.2165350413614253, |
|
"learning_rate": 1.9665125286177448e-05, |
|
"loss": 0.8769, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.37109375, |
|
"grad_norm": 1.2457077882152825, |
|
"learning_rate": 1.9571878011838557e-05, |
|
"loss": 0.8728, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 2.4186928992413006, |
|
"learning_rate": 1.9467470355910438e-05, |
|
"loss": 0.8834, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41015625, |
|
"grad_norm": 1.2020347163023293, |
|
"learning_rate": 1.935202405304951e-05, |
|
"loss": 0.8585, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4296875, |
|
"grad_norm": 1.1135509900758418, |
|
"learning_rate": 1.922567370848072e-05, |
|
"loss": 0.8443, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.44921875, |
|
"grad_norm": 1.190845364147945, |
|
"learning_rate": 1.9088566641053887e-05, |
|
"loss": 0.868, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 1.107956734795049, |
|
"learning_rate": 1.8940862711476515e-05, |
|
"loss": 0.8316, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48828125, |
|
"grad_norm": 1.2765334463268576, |
|
"learning_rate": 1.878273413592334e-05, |
|
"loss": 0.8578, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5078125, |
|
"grad_norm": 1.1648480884158616, |
|
"learning_rate": 1.8614365285240002e-05, |
|
"loss": 0.846, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.52734375, |
|
"grad_norm": 1.135005532726839, |
|
"learning_rate": 1.8435952469974858e-05, |
|
"loss": 0.8615, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"grad_norm": 1.2325035965433688, |
|
"learning_rate": 1.8247703711489684e-05, |
|
"loss": 0.8349, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.56640625, |
|
"grad_norm": 1.0925509469327004, |
|
"learning_rate": 1.804983849941607e-05, |
|
"loss": 0.8546, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.5859375, |
|
"grad_norm": 1.2488571066795477, |
|
"learning_rate": 1.7842587535740315e-05, |
|
"loss": 0.9118, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.60546875, |
|
"grad_norm": 1.1937624775603797, |
|
"learning_rate": 1.762619246581524e-05, |
|
"loss": 0.8505, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 1.1137502231777754, |
|
"learning_rate": 1.740090559661252e-05, |
|
"loss": 0.8352, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.64453125, |
|
"grad_norm": 1.1828666444300104, |
|
"learning_rate": 1.7166989602544036e-05, |
|
"loss": 0.8359, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6640625, |
|
"grad_norm": 3.65203815585326, |
|
"learning_rate": 1.6924717219195258e-05, |
|
"loss": 0.8689, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.68359375, |
|
"grad_norm": 1.1416859572228402, |
|
"learning_rate": 1.667437092532776e-05, |
|
"loss": 0.8472, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"grad_norm": 1.1051850039812487, |
|
"learning_rate": 1.6416242613521612e-05, |
|
"loss": 0.828, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.72265625, |
|
"grad_norm": 1.1362330964244478, |
|
"learning_rate": 1.6150633249841696e-05, |
|
"loss": 0.8316, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7421875, |
|
"grad_norm": 1.1663508920755614, |
|
"learning_rate": 1.5877852522924733e-05, |
|
"loss": 0.8199, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.76171875, |
|
"grad_norm": 1.069723196727746, |
|
"learning_rate": 1.5598218482896182e-05, |
|
"loss": 0.8322, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 1.1519350666106931, |
|
"learning_rate": 1.5312057170538033e-05, |
|
"loss": 0.8329, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.80078125, |
|
"grad_norm": 1.113639432195006, |
|
"learning_rate": 1.501970223713983e-05, |
|
"loss": 0.8501, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8203125, |
|
"grad_norm": 6.880127196389451, |
|
"learning_rate": 1.4721494555476189e-05, |
|
"loss": 0.8796, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.83984375, |
|
"grad_norm": 1.8661887735033835, |
|
"learning_rate": 1.4417781822364396e-05, |
|
"loss": 0.8453, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"grad_norm": 1.1561250264726648, |
|
"learning_rate": 1.4108918153265485e-05, |
|
"loss": 0.8785, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.87890625, |
|
"grad_norm": 1.0352123961524995, |
|
"learning_rate": 1.379526366940142e-05, |
|
"loss": 0.7975, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8984375, |
|
"grad_norm": 1.1431323585880604, |
|
"learning_rate": 1.3477184077869892e-05, |
|
"loss": 0.9143, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.91796875, |
|
"grad_norm": 1.0873880063235835, |
|
"learning_rate": 1.3155050245246171e-05, |
|
"loss": 0.822, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 1.0523607160783444, |
|
"learning_rate": 1.28292377651693e-05, |
|
"loss": 0.8323, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.95703125, |
|
"grad_norm": 1.0157998886067603, |
|
"learning_rate": 1.250012652041669e-05, |
|
"loss": 0.8663, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.9765625, |
|
"grad_norm": 1.050004770812735, |
|
"learning_rate": 1.216810023997781e-05, |
|
"loss": 0.795, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.99609375, |
|
"grad_norm": 0.9901708666135668, |
|
"learning_rate": 1.1833546051643325e-05, |
|
"loss": 0.809, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.8800936341285706, |
|
"eval_runtime": 4.2726, |
|
"eval_samples_per_second": 38.384, |
|
"eval_steps_per_second": 0.702, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.015625, |
|
"grad_norm": 1.6464925548656197, |
|
"learning_rate": 1.1496854030631443e-05, |
|
"loss": 0.6149, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.03515625, |
|
"grad_norm": 1.072559199121631, |
|
"learning_rate": 1.1158416744777644e-05, |
|
"loss": 0.5546, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.0546875, |
|
"grad_norm": 1.108994610542106, |
|
"learning_rate": 1.0818628796818134e-05, |
|
"loss": 0.5328, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.07421875, |
|
"grad_norm": 1.0468024771181579, |
|
"learning_rate": 1.0477886364300722e-05, |
|
"loss": 0.5665, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"grad_norm": 1.1024876273097197, |
|
"learning_rate": 1.013658673765951e-05, |
|
"loss": 0.5031, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.11328125, |
|
"grad_norm": 1.0010594169346207, |
|
"learning_rate": 9.79512785699204e-06, |
|
"loss": 0.5128, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.1328125, |
|
"grad_norm": 0.9826520234931142, |
|
"learning_rate": 9.453907848078901e-06, |
|
"loss": 0.5214, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.15234375, |
|
"grad_norm": 1.06456378745709, |
|
"learning_rate": 9.113324558186922e-06, |
|
"loss": 0.4919, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 1.0193727837903817, |
|
"learning_rate": 8.773775092197018e-06, |
|
"loss": 0.5426, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.19140625, |
|
"grad_norm": 1.1463010979822952, |
|
"learning_rate": 8.43565534959769e-06, |
|
"loss": 0.5146, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.2109375, |
|
"grad_norm": 1.0520157917383735, |
|
"learning_rate": 8.099359562883931e-06, |
|
"loss": 0.5131, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.23046875, |
|
"grad_norm": 1.1402388191576989, |
|
"learning_rate": 7.76527983789973e-06, |
|
"loss": 0.5388, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.1994848233232585, |
|
"learning_rate": 7.433805696660267e-06, |
|
"loss": 0.5114, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.26953125, |
|
"grad_norm": 1.1554670397128242, |
|
"learning_rate": 7.105323623186595e-06, |
|
"loss": 0.5415, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.2890625, |
|
"grad_norm": 1.0601143896880463, |
|
"learning_rate": 6.78021661288262e-06, |
|
"loss": 0.5271, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.30859375, |
|
"grad_norm": 0.9948697176939006, |
|
"learning_rate": 6.458863725979549e-06, |
|
"loss": 0.5093, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.328125, |
|
"grad_norm": 1.2614924364675497, |
|
"learning_rate": 6.141639645568646e-06, |
|
"loss": 0.5263, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.34765625, |
|
"grad_norm": 1.0090836036415356, |
|
"learning_rate": 5.828914240737496e-06, |
|
"loss": 0.5241, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.3671875, |
|
"grad_norm": 0.9710310992423488, |
|
"learning_rate": 5.521052135319182e-06, |
|
"loss": 0.5223, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.38671875, |
|
"grad_norm": 0.9666264916437364, |
|
"learning_rate": 5.2184122827572315e-06, |
|
"loss": 0.522, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.40625, |
|
"grad_norm": 0.9913680270139593, |
|
"learning_rate": 4.921347547581939e-06, |
|
"loss": 0.5026, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.42578125, |
|
"grad_norm": 1.1019424690864423, |
|
"learning_rate": 4.630204293986122e-06, |
|
"loss": 0.5055, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.4453125, |
|
"grad_norm": 1.1364708272450168, |
|
"learning_rate": 4.345321981979942e-06, |
|
"loss": 0.5126, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.46484375, |
|
"grad_norm": 0.944966666142233, |
|
"learning_rate": 4.067032771595749e-06, |
|
"loss": 0.5155, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.484375, |
|
"grad_norm": 0.9992299626369062, |
|
"learning_rate": 3.7956611356043196e-06, |
|
"loss": 0.5262, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.50390625, |
|
"grad_norm": 2.2555525600089146, |
|
"learning_rate": 3.53152348119413e-06, |
|
"loss": 0.5274, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.5234375, |
|
"grad_norm": 2.6543631759205293, |
|
"learning_rate": 3.2749277810547286e-06, |
|
"loss": 0.5407, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.54296875, |
|
"grad_norm": 0.9454239654950802, |
|
"learning_rate": 3.0261732142943435e-06, |
|
"loss": 0.5088, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 0.9651848810153458, |
|
"learning_rate": 2.7855498176104435e-06, |
|
"loss": 0.5156, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.58203125, |
|
"grad_norm": 0.9488391689639479, |
|
"learning_rate": 2.5533381471199138e-06, |
|
"loss": 0.5027, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.6015625, |
|
"grad_norm": 1.0344338188963158, |
|
"learning_rate": 2.3298089512431744e-06, |
|
"loss": 0.516, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.62109375, |
|
"grad_norm": 1.0757146443275445, |
|
"learning_rate": 2.1152228550236264e-06, |
|
"loss": 0.4998, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.640625, |
|
"grad_norm": 1.0916629078435063, |
|
"learning_rate": 1.9098300562505266e-06, |
|
"loss": 0.5618, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.66015625, |
|
"grad_norm": 0.9001642552665725, |
|
"learning_rate": 1.713870033739541e-06, |
|
"loss": 0.5004, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.6796875, |
|
"grad_norm": 0.9887925721175237, |
|
"learning_rate": 1.5275712681111643e-06, |
|
"loss": 0.5118, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.69921875, |
|
"grad_norm": 0.9521081586764941, |
|
"learning_rate": 1.3511509753925422e-06, |
|
"loss": 0.5313, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.71875, |
|
"grad_norm": 0.9103965187812971, |
|
"learning_rate": 1.1848148537532845e-06, |
|
"loss": 0.5009, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.73828125, |
|
"grad_norm": 0.9494855728059224, |
|
"learning_rate": 1.0287568436706208e-06, |
|
"loss": 0.4873, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.7578125, |
|
"grad_norm": 1.0147241592824456, |
|
"learning_rate": 8.831589018034659e-07, |
|
"loss": 0.5237, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.77734375, |
|
"grad_norm": 0.9625772091753605, |
|
"learning_rate": 7.481907888390994e-07, |
|
"loss": 0.5034, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.796875, |
|
"grad_norm": 0.9458519608918025, |
|
"learning_rate": 6.240098715597975e-07, |
|
"loss": 0.5066, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.81640625, |
|
"grad_norm": 0.9480774154256925, |
|
"learning_rate": 5.107609393602019e-07, |
|
"loss": 0.5054, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.8359375, |
|
"grad_norm": 0.9585344451470228, |
|
"learning_rate": 4.0857603542936776e-07, |
|
"loss": 0.5494, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.85546875, |
|
"grad_norm": 0.9741379097553984, |
|
"learning_rate": 3.175743027943079e-07, |
|
"loss": 0.5096, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 0.9143220140212833, |
|
"learning_rate": 2.3786184540455449e-07, |
|
"loss": 0.5118, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.89453125, |
|
"grad_norm": 0.9419358410206972, |
|
"learning_rate": 1.6953160441969707e-07, |
|
"loss": 0.5156, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.9140625, |
|
"grad_norm": 0.9508691680531732, |
|
"learning_rate": 1.1266324984415266e-07, |
|
"loss": 0.5034, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.93359375, |
|
"grad_norm": 0.9252556940433395, |
|
"learning_rate": 6.732308763550022e-08, |
|
"loss": 0.5109, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.953125, |
|
"grad_norm": 0.9221461974805585, |
|
"learning_rate": 3.356398239470427e-08, |
|
"loss": 0.5186, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.97265625, |
|
"grad_norm": 0.9366413757174445, |
|
"learning_rate": 1.142529572835227e-08, |
|
"loss": 0.4819, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.9921875, |
|
"grad_norm": 0.896293999569403, |
|
"learning_rate": 9.328403547792518e-10, |
|
"loss": 0.5409, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.8888566493988037, |
|
"eval_runtime": 5.0992, |
|
"eval_samples_per_second": 32.162, |
|
"eval_steps_per_second": 0.588, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 512, |
|
"total_flos": 107202383708160.0, |
|
"train_loss": 0.697848245035857, |
|
"train_runtime": 3596.6385, |
|
"train_samples_per_second": 9.078, |
|
"train_steps_per_second": 0.142 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 512, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 107202383708160.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|