|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.037801823938005005, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00018900911969002505, |
|
"grad_norm": 0.012686253525316715, |
|
"learning_rate": 2e-05, |
|
"loss": 10.378, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00018900911969002505, |
|
"eval_loss": 10.380497932434082, |
|
"eval_runtime": 7.795, |
|
"eval_samples_per_second": 285.824, |
|
"eval_steps_per_second": 142.912, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003780182393800501, |
|
"grad_norm": 0.014491567388176918, |
|
"learning_rate": 4e-05, |
|
"loss": 10.3803, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005670273590700751, |
|
"grad_norm": 0.012410955503582954, |
|
"learning_rate": 6e-05, |
|
"loss": 10.3807, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0007560364787601002, |
|
"grad_norm": 0.01625671051442623, |
|
"learning_rate": 8e-05, |
|
"loss": 10.381, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0009450455984501252, |
|
"grad_norm": 0.01246438268572092, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3789, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0011340547181401502, |
|
"grad_norm": 0.012397886253893375, |
|
"learning_rate": 0.00012, |
|
"loss": 10.3793, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0013230638378301752, |
|
"grad_norm": 0.012805677950382233, |
|
"learning_rate": 0.00014, |
|
"loss": 10.3823, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0015120729575202004, |
|
"grad_norm": 0.015158340334892273, |
|
"learning_rate": 0.00016, |
|
"loss": 10.3804, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0017010820772102254, |
|
"grad_norm": 0.01396258920431137, |
|
"learning_rate": 0.00018, |
|
"loss": 10.38, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0018900911969002504, |
|
"grad_norm": 0.01365083921700716, |
|
"learning_rate": 0.0002, |
|
"loss": 10.3809, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0020791003165902757, |
|
"grad_norm": 0.014954385347664356, |
|
"learning_rate": 0.0001999863304992469, |
|
"loss": 10.378, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0022681094362803004, |
|
"grad_norm": 0.01471993513405323, |
|
"learning_rate": 0.00019994532573409262, |
|
"loss": 10.3826, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0024571185559703257, |
|
"grad_norm": 0.013966689817607403, |
|
"learning_rate": 0.00019987699691483048, |
|
"loss": 10.381, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0026461276756603504, |
|
"grad_norm": 0.012581618502736092, |
|
"learning_rate": 0.00019978136272187747, |
|
"loss": 10.3835, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0028351367953503757, |
|
"grad_norm": 0.01435596588999033, |
|
"learning_rate": 0.000199658449300667, |
|
"loss": 10.3825, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003024145915040401, |
|
"grad_norm": 0.01442666631191969, |
|
"learning_rate": 0.00019950829025450114, |
|
"loss": 10.3798, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0032131550347304257, |
|
"grad_norm": 0.011960881762206554, |
|
"learning_rate": 0.00019933092663536382, |
|
"loss": 10.3795, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.003402164154420451, |
|
"grad_norm": 0.017339341342449188, |
|
"learning_rate": 0.00019912640693269752, |
|
"loss": 10.3815, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0035911732741104757, |
|
"grad_norm": 0.014264927245676517, |
|
"learning_rate": 0.00019889478706014687, |
|
"loss": 10.3823, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003780182393800501, |
|
"grad_norm": 0.013290469534695148, |
|
"learning_rate": 0.00019863613034027224, |
|
"loss": 10.3805, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003969191513490526, |
|
"grad_norm": 0.012597162276506424, |
|
"learning_rate": 0.00019835050748723824, |
|
"loss": 10.3767, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.004158200633180551, |
|
"grad_norm": 0.014128554612398148, |
|
"learning_rate": 0.00019803799658748094, |
|
"loss": 10.3819, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004347209752870576, |
|
"grad_norm": 0.011865226551890373, |
|
"learning_rate": 0.00019769868307835994, |
|
"loss": 10.3771, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004536218872560601, |
|
"grad_norm": 0.013639312237501144, |
|
"learning_rate": 0.0001973326597248006, |
|
"loss": 10.3778, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.004725227992250626, |
|
"grad_norm": 0.0164469163864851, |
|
"learning_rate": 0.00019694002659393305, |
|
"loss": 10.3829, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004914237111940651, |
|
"grad_norm": 0.01444909255951643, |
|
"learning_rate": 0.00019652089102773488, |
|
"loss": 10.3794, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.005103246231630676, |
|
"grad_norm": 0.014745713211596012, |
|
"learning_rate": 0.00019607536761368484, |
|
"loss": 10.3777, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.005292255351320701, |
|
"grad_norm": 0.011052118614315987, |
|
"learning_rate": 0.00019560357815343577, |
|
"loss": 10.3803, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0054812644710107265, |
|
"grad_norm": 0.013586455024778843, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 10.3795, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.005670273590700751, |
|
"grad_norm": 0.012791085056960583, |
|
"learning_rate": 0.00019458172417006347, |
|
"loss": 10.3785, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005859282710390776, |
|
"grad_norm": 0.016092879697680473, |
|
"learning_rate": 0.00019403193901161613, |
|
"loss": 10.3789, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.006048291830080802, |
|
"grad_norm": 0.016704507172107697, |
|
"learning_rate": 0.0001934564464599461, |
|
"loss": 10.3781, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0062373009497708265, |
|
"grad_norm": 0.014680329710245132, |
|
"learning_rate": 0.00019285540384897073, |
|
"loss": 10.3804, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.006426310069460851, |
|
"grad_norm": 0.01731293648481369, |
|
"learning_rate": 0.00019222897549773848, |
|
"loss": 10.3789, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.006615319189150876, |
|
"grad_norm": 0.015626097097992897, |
|
"learning_rate": 0.00019157733266550575, |
|
"loss": 10.3798, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.006804328308840902, |
|
"grad_norm": 0.014310219325125217, |
|
"learning_rate": 0.00019090065350491626, |
|
"loss": 10.3773, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0069933374285309265, |
|
"grad_norm": 0.015803059563040733, |
|
"learning_rate": 0.00019019912301329592, |
|
"loss": 10.3796, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.007182346548220951, |
|
"grad_norm": 0.015785884112119675, |
|
"learning_rate": 0.00018947293298207635, |
|
"loss": 10.3788, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.007371355667910977, |
|
"grad_norm": 0.015653641894459724, |
|
"learning_rate": 0.0001887222819443612, |
|
"loss": 10.3798, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.007560364787601002, |
|
"grad_norm": 0.01727336086332798, |
|
"learning_rate": 0.0001879473751206489, |
|
"loss": 10.3811, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0077493739072910265, |
|
"grad_norm": 0.01792372763156891, |
|
"learning_rate": 0.00018714842436272773, |
|
"loss": 10.3778, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.007938383026981051, |
|
"grad_norm": 0.013475922867655754, |
|
"learning_rate": 0.00018632564809575742, |
|
"loss": 10.3802, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.008127392146671077, |
|
"grad_norm": 0.014228393323719501, |
|
"learning_rate": 0.0001854792712585539, |
|
"loss": 10.3804, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.008316401266361103, |
|
"grad_norm": 0.022298606112599373, |
|
"learning_rate": 0.00018460952524209355, |
|
"loss": 10.3762, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.008505410386051127, |
|
"grad_norm": 0.015143272466957569, |
|
"learning_rate": 0.00018371664782625287, |
|
"loss": 10.3791, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.008694419505741152, |
|
"grad_norm": 0.017378278076648712, |
|
"learning_rate": 0.00018280088311480201, |
|
"loss": 10.3784, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.008883428625431178, |
|
"grad_norm": 0.01839020662009716, |
|
"learning_rate": 0.00018186248146866927, |
|
"loss": 10.378, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.009072437745121202, |
|
"grad_norm": 0.0227019265294075, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 10.3784, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.009261446864811227, |
|
"grad_norm": 0.01824440062046051, |
|
"learning_rate": 0.0001799187996894925, |
|
"loss": 10.3779, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.009450455984501251, |
|
"grad_norm": 0.021518880501389503, |
|
"learning_rate": 0.00017891405093963938, |
|
"loss": 10.3767, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009450455984501251, |
|
"eval_loss": 10.377639770507812, |
|
"eval_runtime": 7.9159, |
|
"eval_samples_per_second": 281.459, |
|
"eval_steps_per_second": 140.729, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009639465104191277, |
|
"grad_norm": 0.019554248079657555, |
|
"learning_rate": 0.00017788772787621126, |
|
"loss": 10.3756, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.009828474223881303, |
|
"grad_norm": 0.018241645768284798, |
|
"learning_rate": 0.00017684011108568592, |
|
"loss": 10.3744, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.010017483343571327, |
|
"grad_norm": 0.018672486767172813, |
|
"learning_rate": 0.0001757714869760335, |
|
"loss": 10.379, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.010206492463261352, |
|
"grad_norm": 0.02010202221572399, |
|
"learning_rate": 0.0001746821476984154, |
|
"loss": 10.3781, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.010395501582951378, |
|
"grad_norm": 0.024531200528144836, |
|
"learning_rate": 0.00017357239106731317, |
|
"loss": 10.3781, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.010584510702641402, |
|
"grad_norm": 0.019959786906838417, |
|
"learning_rate": 0.00017244252047910892, |
|
"loss": 10.377, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.010773519822331427, |
|
"grad_norm": 0.024574976414442062, |
|
"learning_rate": 0.00017129284482913972, |
|
"loss": 10.3771, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.010962528942021453, |
|
"grad_norm": 0.02337220311164856, |
|
"learning_rate": 0.00017012367842724887, |
|
"loss": 10.3783, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.011151538061711477, |
|
"grad_norm": 0.024848444387316704, |
|
"learning_rate": 0.0001689353409118566, |
|
"loss": 10.3754, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.011340547181401503, |
|
"grad_norm": 0.02240614965558052, |
|
"learning_rate": 0.00016772815716257412, |
|
"loss": 10.376, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.011529556301091528, |
|
"grad_norm": 0.029676686972379684, |
|
"learning_rate": 0.0001665024572113848, |
|
"loss": 10.3771, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.011718565420781552, |
|
"grad_norm": 0.029000744223594666, |
|
"learning_rate": 0.00016525857615241687, |
|
"loss": 10.3737, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.011907574540471578, |
|
"grad_norm": 0.029285695403814316, |
|
"learning_rate": 0.00016399685405033167, |
|
"loss": 10.376, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.012096583660161603, |
|
"grad_norm": 0.023476559668779373, |
|
"learning_rate": 0.0001627176358473537, |
|
"loss": 10.3757, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.012285592779851627, |
|
"grad_norm": 0.024431871250271797, |
|
"learning_rate": 0.0001614212712689668, |
|
"loss": 10.3758, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.012474601899541653, |
|
"grad_norm": 0.024970779195427895, |
|
"learning_rate": 0.00016010811472830252, |
|
"loss": 10.3764, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.012663611019231679, |
|
"grad_norm": 0.03543892130255699, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 10.3729, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.012852620138921703, |
|
"grad_norm": 0.031035298481583595, |
|
"learning_rate": 0.00015743286626829437, |
|
"loss": 10.3762, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.013041629258611728, |
|
"grad_norm": 0.030213864520192146, |
|
"learning_rate": 0.0001560715057351673, |
|
"loss": 10.3742, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.013230638378301752, |
|
"grad_norm": 0.03963561728596687, |
|
"learning_rate": 0.00015469481581224272, |
|
"loss": 10.3749, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.013419647497991778, |
|
"grad_norm": 0.029410356655716896, |
|
"learning_rate": 0.0001533031728727994, |
|
"loss": 10.3749, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.013608656617681803, |
|
"grad_norm": 0.03572849556803703, |
|
"learning_rate": 0.00015189695737812152, |
|
"loss": 10.3748, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.013797665737371827, |
|
"grad_norm": 0.036424197256565094, |
|
"learning_rate": 0.0001504765537734844, |
|
"loss": 10.3722, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.013986674857061853, |
|
"grad_norm": 0.03506350517272949, |
|
"learning_rate": 0.00014904235038305083, |
|
"loss": 10.3775, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.014175683976751879, |
|
"grad_norm": 0.02972862310707569, |
|
"learning_rate": 0.00014759473930370736, |
|
"loss": 10.3755, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.014364693096441903, |
|
"grad_norm": 0.033316999673843384, |
|
"learning_rate": 0.0001461341162978688, |
|
"loss": 10.3737, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.014553702216131928, |
|
"grad_norm": 0.04248441383242607, |
|
"learning_rate": 0.00014466088068528068, |
|
"loss": 10.3734, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.014742711335821954, |
|
"grad_norm": 0.05600935220718384, |
|
"learning_rate": 0.00014317543523384928, |
|
"loss": 10.371, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.014931720455511978, |
|
"grad_norm": 0.04100443795323372, |
|
"learning_rate": 0.00014167818604952906, |
|
"loss": 10.371, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.015120729575202004, |
|
"grad_norm": 0.03395973518490791, |
|
"learning_rate": 0.00014016954246529696, |
|
"loss": 10.3753, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01530973869489203, |
|
"grad_norm": 0.059643857181072235, |
|
"learning_rate": 0.00013864991692924523, |
|
"loss": 10.3731, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.015498747814582053, |
|
"grad_norm": 0.05056724324822426, |
|
"learning_rate": 0.00013711972489182208, |
|
"loss": 10.3712, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.015687756934272077, |
|
"grad_norm": 0.042414236813783646, |
|
"learning_rate": 0.00013557938469225167, |
|
"loss": 10.3717, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.015876766053962103, |
|
"grad_norm": 0.054200392216444016, |
|
"learning_rate": 0.00013402931744416433, |
|
"loss": 10.3728, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.01606577517365213, |
|
"grad_norm": 0.07007519900798798, |
|
"learning_rate": 0.00013246994692046836, |
|
"loss": 10.3683, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.016254784293342154, |
|
"grad_norm": 0.054717712104320526, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 10.3694, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.01644379341303218, |
|
"grad_norm": 0.05400513857603073, |
|
"learning_rate": 0.0001293250037384465, |
|
"loss": 10.3693, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.016632802532722205, |
|
"grad_norm": 0.059751372784376144, |
|
"learning_rate": 0.00012774029087618446, |
|
"loss": 10.3706, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.016821811652412227, |
|
"grad_norm": 0.04886062070727348, |
|
"learning_rate": 0.00012614799409538198, |
|
"loss": 10.3715, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.017010820772102253, |
|
"grad_norm": 0.06151995435357094, |
|
"learning_rate": 0.00012454854871407994, |
|
"loss": 10.3682, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01719982989179228, |
|
"grad_norm": 0.06826233118772507, |
|
"learning_rate": 0.00012294239200467516, |
|
"loss": 10.3725, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.017388839011482304, |
|
"grad_norm": 0.05669957771897316, |
|
"learning_rate": 0.0001213299630743747, |
|
"loss": 10.3681, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.01757784813117233, |
|
"grad_norm": 0.06860942393541336, |
|
"learning_rate": 0.00011971170274514802, |
|
"loss": 10.3671, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.017766857250862356, |
|
"grad_norm": 0.06745350360870361, |
|
"learning_rate": 0.000118088053433211, |
|
"loss": 10.3681, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.017955866370552378, |
|
"grad_norm": 0.059718843549489975, |
|
"learning_rate": 0.00011645945902807341, |
|
"loss": 10.3688, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.018144875490242404, |
|
"grad_norm": 0.07292637974023819, |
|
"learning_rate": 0.0001148263647711842, |
|
"loss": 10.367, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01833388460993243, |
|
"grad_norm": 0.055075109004974365, |
|
"learning_rate": 0.00011318921713420691, |
|
"loss": 10.3684, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.018522893729622455, |
|
"grad_norm": 0.057356979697942734, |
|
"learning_rate": 0.00011154846369695863, |
|
"loss": 10.367, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01871190284931248, |
|
"grad_norm": 0.08173134177923203, |
|
"learning_rate": 0.0001099045530250463, |
|
"loss": 10.3678, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.018900911969002503, |
|
"grad_norm": 0.05882032588124275, |
|
"learning_rate": 0.00010825793454723325, |
|
"loss": 10.3677, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.018900911969002503, |
|
"eval_loss": 10.365761756896973, |
|
"eval_runtime": 7.959, |
|
"eval_samples_per_second": 279.935, |
|
"eval_steps_per_second": 139.967, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01908992108869253, |
|
"grad_norm": 0.08496179431676865, |
|
"learning_rate": 0.00010660905843256994, |
|
"loss": 10.3629, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.019278930208382554, |
|
"grad_norm": 0.06867600232362747, |
|
"learning_rate": 0.00010495837546732224, |
|
"loss": 10.3636, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.01946793932807258, |
|
"grad_norm": 0.07961437106132507, |
|
"learning_rate": 0.00010330633693173082, |
|
"loss": 10.3635, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.019656948447762605, |
|
"grad_norm": 0.08998848497867584, |
|
"learning_rate": 0.00010165339447663587, |
|
"loss": 10.3633, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01984595756745263, |
|
"grad_norm": 0.06109720841050148, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3643, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.020034966687142653, |
|
"grad_norm": 0.07281994819641113, |
|
"learning_rate": 9.834660552336415e-05, |
|
"loss": 10.3632, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.02022397580683268, |
|
"grad_norm": 0.06789088249206543, |
|
"learning_rate": 9.669366306826919e-05, |
|
"loss": 10.3608, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.020412984926522704, |
|
"grad_norm": 0.06652762740850449, |
|
"learning_rate": 9.504162453267777e-05, |
|
"loss": 10.3611, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.02060199404621273, |
|
"grad_norm": 0.060695480555295944, |
|
"learning_rate": 9.339094156743007e-05, |
|
"loss": 10.3631, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.020791003165902756, |
|
"grad_norm": 0.07064124196767807, |
|
"learning_rate": 9.174206545276677e-05, |
|
"loss": 10.3618, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02098001228559278, |
|
"grad_norm": 0.06326396763324738, |
|
"learning_rate": 9.009544697495374e-05, |
|
"loss": 10.364, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.021169021405282804, |
|
"grad_norm": 0.0698811262845993, |
|
"learning_rate": 8.845153630304139e-05, |
|
"loss": 10.3626, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.02135803052497283, |
|
"grad_norm": 0.08220046013593674, |
|
"learning_rate": 8.681078286579311e-05, |
|
"loss": 10.3631, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.021547039644662855, |
|
"grad_norm": 0.060157131403684616, |
|
"learning_rate": 8.517363522881579e-05, |
|
"loss": 10.3617, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.02173604876435288, |
|
"grad_norm": 0.07724905014038086, |
|
"learning_rate": 8.35405409719266e-05, |
|
"loss": 10.3604, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.021925057884042906, |
|
"grad_norm": 0.0626760870218277, |
|
"learning_rate": 8.191194656678904e-05, |
|
"loss": 10.3616, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.022114067003732932, |
|
"grad_norm": 0.07387042045593262, |
|
"learning_rate": 8.028829725485199e-05, |
|
"loss": 10.3615, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.022303076123422954, |
|
"grad_norm": 0.07770082354545593, |
|
"learning_rate": 7.867003692562534e-05, |
|
"loss": 10.3611, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.02249208524311298, |
|
"grad_norm": 0.06714287400245667, |
|
"learning_rate": 7.705760799532485e-05, |
|
"loss": 10.3588, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.022681094362803005, |
|
"grad_norm": 0.06426379829645157, |
|
"learning_rate": 7.54514512859201e-05, |
|
"loss": 10.3609, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02287010348249303, |
|
"grad_norm": 0.06445758044719696, |
|
"learning_rate": 7.385200590461803e-05, |
|
"loss": 10.3584, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.023059112602183057, |
|
"grad_norm": 0.08041175454854965, |
|
"learning_rate": 7.225970912381556e-05, |
|
"loss": 10.3583, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.02324812172187308, |
|
"grad_norm": 0.06840039044618607, |
|
"learning_rate": 7.067499626155354e-05, |
|
"loss": 10.3619, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.023437130841563104, |
|
"grad_norm": 0.06949985772371292, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 10.3595, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.02362613996125313, |
|
"grad_norm": 0.07371090352535248, |
|
"learning_rate": 6.753005307953167e-05, |
|
"loss": 10.3602, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.023815149080943156, |
|
"grad_norm": 0.059921570122241974, |
|
"learning_rate": 6.59706825558357e-05, |
|
"loss": 10.36, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.02400415820063318, |
|
"grad_norm": 0.056925393640995026, |
|
"learning_rate": 6.442061530774834e-05, |
|
"loss": 10.3556, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.024193167320323207, |
|
"grad_norm": 0.06497975438833237, |
|
"learning_rate": 6.28802751081779e-05, |
|
"loss": 10.3599, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02438217644001323, |
|
"grad_norm": 0.0650826096534729, |
|
"learning_rate": 6.135008307075481e-05, |
|
"loss": 10.3586, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.024571185559703255, |
|
"grad_norm": 0.07630787789821625, |
|
"learning_rate": 5.983045753470308e-05, |
|
"loss": 10.3565, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02476019467939328, |
|
"grad_norm": 0.05852606147527695, |
|
"learning_rate": 5.832181395047098e-05, |
|
"loss": 10.3634, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.024949203799083306, |
|
"grad_norm": 0.0687006339430809, |
|
"learning_rate": 5.6824564766150726e-05, |
|
"loss": 10.3579, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.025138212918773332, |
|
"grad_norm": 0.0733431875705719, |
|
"learning_rate": 5.533911931471936e-05, |
|
"loss": 10.355, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.025327222038463357, |
|
"grad_norm": 0.05864822492003441, |
|
"learning_rate": 5.386588370213124e-05, |
|
"loss": 10.3593, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.02551623115815338, |
|
"grad_norm": 0.055226318538188934, |
|
"learning_rate": 5.240526069629265e-05, |
|
"loss": 10.359, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.025705240277843405, |
|
"grad_norm": 0.07053076475858688, |
|
"learning_rate": 5.095764961694922e-05, |
|
"loss": 10.3572, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.02589424939753343, |
|
"grad_norm": 0.07019758224487305, |
|
"learning_rate": 4.952344622651566e-05, |
|
"loss": 10.3572, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.026083258517223457, |
|
"grad_norm": 0.06227070838212967, |
|
"learning_rate": 4.810304262187852e-05, |
|
"loss": 10.3573, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.026272267636913482, |
|
"grad_norm": 0.06058792397379875, |
|
"learning_rate": 4.669682712720065e-05, |
|
"loss": 10.3586, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.026461276756603504, |
|
"grad_norm": 0.06949535012245178, |
|
"learning_rate": 4.530518418775733e-05, |
|
"loss": 10.356, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02665028587629353, |
|
"grad_norm": 0.07298791408538818, |
|
"learning_rate": 4.392849426483274e-05, |
|
"loss": 10.3584, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.026839294995983556, |
|
"grad_norm": 0.05652182549238205, |
|
"learning_rate": 4.256713373170564e-05, |
|
"loss": 10.3584, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.02702830411567358, |
|
"grad_norm": 0.05551915243268013, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 10.3588, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.027217313235363607, |
|
"grad_norm": 0.05911076441407204, |
|
"learning_rate": 3.9891885271697496e-05, |
|
"loss": 10.3593, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.027406322355053633, |
|
"grad_norm": 0.054930880665779114, |
|
"learning_rate": 3.857872873103322e-05, |
|
"loss": 10.3573, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.027595331474743655, |
|
"grad_norm": 0.05678775534033775, |
|
"learning_rate": 3.7282364152646297e-05, |
|
"loss": 10.3593, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.02778434059443368, |
|
"grad_norm": 0.06021711975336075, |
|
"learning_rate": 3.600314594966834e-05, |
|
"loss": 10.3568, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.027973349714123706, |
|
"grad_norm": 0.05524953454732895, |
|
"learning_rate": 3.4741423847583134e-05, |
|
"loss": 10.3577, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.028162358833813732, |
|
"grad_norm": 0.056392621248960495, |
|
"learning_rate": 3.349754278861517e-05, |
|
"loss": 10.3564, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.028351367953503757, |
|
"grad_norm": 0.047636594623327255, |
|
"learning_rate": 3.227184283742591e-05, |
|
"loss": 10.355, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.028351367953503757, |
|
"eval_loss": 10.35706901550293, |
|
"eval_runtime": 7.9784, |
|
"eval_samples_per_second": 279.253, |
|
"eval_steps_per_second": 139.626, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.028540377073193783, |
|
"grad_norm": 0.06216558441519737, |
|
"learning_rate": 3.106465908814342e-05, |
|
"loss": 10.3635, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.028729386192883805, |
|
"grad_norm": 0.05615238845348358, |
|
"learning_rate": 2.9876321572751144e-05, |
|
"loss": 10.3545, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.02891839531257383, |
|
"grad_norm": 0.057838547974824905, |
|
"learning_rate": 2.87071551708603e-05, |
|
"loss": 10.3583, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.029107404432263857, |
|
"grad_norm": 0.052986498922109604, |
|
"learning_rate": 2.7557479520891104e-05, |
|
"loss": 10.3552, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.029296413551953882, |
|
"grad_norm": 0.055312514305114746, |
|
"learning_rate": 2.6427608932686843e-05, |
|
"loss": 10.356, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.029485422671643908, |
|
"grad_norm": 0.05807175859808922, |
|
"learning_rate": 2.5317852301584643e-05, |
|
"loss": 10.3565, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.029674431791333934, |
|
"grad_norm": 0.06103501841425896, |
|
"learning_rate": 2.422851302396655e-05, |
|
"loss": 10.3547, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.029863440911023956, |
|
"grad_norm": 0.05311097204685211, |
|
"learning_rate": 2.315988891431412e-05, |
|
"loss": 10.355, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.03005245003071398, |
|
"grad_norm": 0.05598808079957962, |
|
"learning_rate": 2.2112272123788768e-05, |
|
"loss": 10.3571, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.030241459150404007, |
|
"grad_norm": 0.05674521625041962, |
|
"learning_rate": 2.1085949060360654e-05, |
|
"loss": 10.3537, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.030430468270094033, |
|
"grad_norm": 0.05536748096346855, |
|
"learning_rate": 2.008120031050753e-05, |
|
"loss": 10.3545, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.03061947738978406, |
|
"grad_norm": 0.051667895168066025, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 10.3562, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.03080848650947408, |
|
"grad_norm": 0.05974581092596054, |
|
"learning_rate": 1.8137518531330767e-05, |
|
"loss": 10.3538, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.030997495629164106, |
|
"grad_norm": 0.05384739860892296, |
|
"learning_rate": 1.7199116885197995e-05, |
|
"loss": 10.3574, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.031186504748854132, |
|
"grad_norm": 0.06221851706504822, |
|
"learning_rate": 1.6283352173747145e-05, |
|
"loss": 10.3545, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.031375513868544154, |
|
"grad_norm": 0.05726971849799156, |
|
"learning_rate": 1.5390474757906446e-05, |
|
"loss": 10.3546, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.03156452298823418, |
|
"grad_norm": 0.061142805963754654, |
|
"learning_rate": 1.4520728741446089e-05, |
|
"loss": 10.3572, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.031753532107924205, |
|
"grad_norm": 0.055892013013362885, |
|
"learning_rate": 1.3674351904242611e-05, |
|
"loss": 10.3557, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.03194254122761423, |
|
"grad_norm": 0.04323485121130943, |
|
"learning_rate": 1.2851575637272262e-05, |
|
"loss": 10.3557, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.03213155034730426, |
|
"grad_norm": 0.060065269470214844, |
|
"learning_rate": 1.2052624879351104e-05, |
|
"loss": 10.358, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03232055946699428, |
|
"grad_norm": 0.05526785925030708, |
|
"learning_rate": 1.1277718055638819e-05, |
|
"loss": 10.3573, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.03250956858668431, |
|
"grad_norm": 0.05861446261405945, |
|
"learning_rate": 1.0527067017923654e-05, |
|
"loss": 10.3558, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.032698577706374334, |
|
"grad_norm": 0.04735025390982628, |
|
"learning_rate": 9.80087698670411e-06, |
|
"loss": 10.36, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.03288758682606436, |
|
"grad_norm": 0.05781494081020355, |
|
"learning_rate": 9.09934649508375e-06, |
|
"loss": 10.3573, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.033076595945754385, |
|
"grad_norm": 0.050222914665937424, |
|
"learning_rate": 8.422667334494249e-06, |
|
"loss": 10.3582, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03326560506544441, |
|
"grad_norm": 0.048023052513599396, |
|
"learning_rate": 7.771024502261526e-06, |
|
"loss": 10.3555, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.033454614185134436, |
|
"grad_norm": 0.05381546914577484, |
|
"learning_rate": 7.144596151029303e-06, |
|
"loss": 10.3539, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.033643623304824455, |
|
"grad_norm": 0.047901567071676254, |
|
"learning_rate": 6.543553540053926e-06, |
|
"loss": 10.3579, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.03383263242451448, |
|
"grad_norm": 0.06044565513730049, |
|
"learning_rate": 5.968060988383883e-06, |
|
"loss": 10.3576, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.034021641544204506, |
|
"grad_norm": 0.046510256826877594, |
|
"learning_rate": 5.418275829936537e-06, |
|
"loss": 10.3577, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03421065066389453, |
|
"grad_norm": 0.06163431704044342, |
|
"learning_rate": 4.8943483704846475e-06, |
|
"loss": 10.3548, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.03439965978358456, |
|
"grad_norm": 0.04503452777862549, |
|
"learning_rate": 4.3964218465642355e-06, |
|
"loss": 10.3549, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.03458866890327458, |
|
"grad_norm": 0.06345223635435104, |
|
"learning_rate": 3.924632386315186e-06, |
|
"loss": 10.3554, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.03477767802296461, |
|
"grad_norm": 0.06070295348763466, |
|
"learning_rate": 3.4791089722651436e-06, |
|
"loss": 10.3572, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.034966687142654634, |
|
"grad_norm": 0.056091003119945526, |
|
"learning_rate": 3.059973406066963e-06, |
|
"loss": 10.3572, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.03515569626234466, |
|
"grad_norm": 0.06234334036707878, |
|
"learning_rate": 2.667340275199426e-06, |
|
"loss": 10.3522, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.035344705382034686, |
|
"grad_norm": 0.060099318623542786, |
|
"learning_rate": 2.3013169216400733e-06, |
|
"loss": 10.3557, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.03553371450172471, |
|
"grad_norm": 0.04145883023738861, |
|
"learning_rate": 1.9620034125190644e-06, |
|
"loss": 10.3583, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.03572272362141473, |
|
"grad_norm": 0.06402470171451569, |
|
"learning_rate": 1.6494925127617634e-06, |
|
"loss": 10.3515, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.035911732741104756, |
|
"grad_norm": 0.054933883249759674, |
|
"learning_rate": 1.3638696597277679e-06, |
|
"loss": 10.3598, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03610074186079478, |
|
"grad_norm": 0.058887969702482224, |
|
"learning_rate": 1.1052129398531507e-06, |
|
"loss": 10.3567, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.03628975098048481, |
|
"grad_norm": 0.057484857738018036, |
|
"learning_rate": 8.735930673024806e-07, |
|
"loss": 10.3565, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03647876010017483, |
|
"grad_norm": 0.057735905051231384, |
|
"learning_rate": 6.690733646361857e-07, |
|
"loss": 10.357, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.03666776921986486, |
|
"grad_norm": 0.05184992402791977, |
|
"learning_rate": 4.917097454988584e-07, |
|
"loss": 10.3594, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.036856778339554884, |
|
"grad_norm": 0.061963826417922974, |
|
"learning_rate": 3.415506993330153e-07, |
|
"loss": 10.3577, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.03704578745924491, |
|
"grad_norm": 0.06438171863555908, |
|
"learning_rate": 2.1863727812254653e-07, |
|
"loss": 10.3575, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.037234796578934935, |
|
"grad_norm": 0.05486460402607918, |
|
"learning_rate": 1.230030851695263e-07, |
|
"loss": 10.3526, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03742380569862496, |
|
"grad_norm": 0.05459611490368843, |
|
"learning_rate": 5.467426590739511e-08, |
|
"loss": 10.3542, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03761281481831499, |
|
"grad_norm": 0.05004322901368141, |
|
"learning_rate": 1.3669500753099585e-08, |
|
"loss": 10.3573, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.037801823938005005, |
|
"grad_norm": 0.06536795943975449, |
|
"learning_rate": 0.0, |
|
"loss": 10.3544, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.037801823938005005, |
|
"eval_loss": 10.35628604888916, |
|
"eval_runtime": 7.8439, |
|
"eval_samples_per_second": 284.041, |
|
"eval_steps_per_second": 142.021, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5172363264000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|