|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 114970, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.697921196833957e-09, |
|
"loss": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.001304688179525e-06, |
|
"loss": 0.0024, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.000260937635905e-05, |
|
"loss": 0.0025, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.5003914064538576e-05, |
|
"loss": 0.0021, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.00052187527181e-05, |
|
"loss": 0.002, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.500652344089763e-05, |
|
"loss": 0.0024, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.0007828129077153e-05, |
|
"loss": 0.0021, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.500913281725668e-05, |
|
"loss": 0.0024, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.00104375054362e-05, |
|
"loss": 0.0021, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.501174219361573e-05, |
|
"loss": 0.0021, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5.001304688179526e-05, |
|
"loss": 0.0021, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.501435156997478e-05, |
|
"loss": 0.0022, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.0015656258154306e-05, |
|
"loss": 0.002, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.501696094633383e-05, |
|
"loss": 0.0021, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.001826563451336e-05, |
|
"loss": 0.0018, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.501957032269288e-05, |
|
"loss": 0.0021, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.00208750108724e-05, |
|
"loss": 0.0019, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.502217969905193e-05, |
|
"loss": 0.0023, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.002348438723146e-05, |
|
"loss": 0.0021, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.502478907541099e-05, |
|
"loss": 0.0021, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.99971006929344e-05, |
|
"loss": 0.0022, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.944140017202556e-05, |
|
"loss": 0.0025, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.888569965111673e-05, |
|
"loss": 0.0022, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.832999913020789e-05, |
|
"loss": 0.0022, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.777429860929905e-05, |
|
"loss": 0.002, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.721859808839022e-05, |
|
"loss": 0.0021, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.666289756748138e-05, |
|
"loss": 0.0023, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.610719704657253e-05, |
|
"loss": 0.0023, |
|
"step": 15525 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.55514965256637e-05, |
|
"loss": 0.0023, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.499579600475486e-05, |
|
"loss": 0.002, |
|
"step": 16675 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.444009548384603e-05, |
|
"loss": 0.0025, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.388439496293719e-05, |
|
"loss": 0.0023, |
|
"step": 17825 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.332869444202836e-05, |
|
"loss": 0.0026, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.277299392111952e-05, |
|
"loss": 0.0025, |
|
"step": 18975 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.221729340021069e-05, |
|
"loss": 0.0027, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.166159287930185e-05, |
|
"loss": 0.0024, |
|
"step": 20125 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.110589235839302e-05, |
|
"loss": 0.0022, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.055019183748418e-05, |
|
"loss": 0.0026, |
|
"step": 21275 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.999449131657535e-05, |
|
"loss": 0.0025, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.94387907956665e-05, |
|
"loss": 0.0024, |
|
"step": 22425 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.02637363225221634, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 23.0018, |
|
"eval_samples_per_second": 11.217, |
|
"eval_steps_per_second": 0.783, |
|
"step": 22994 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.888309027475768e-05, |
|
"loss": 0.0026, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.832738975384883e-05, |
|
"loss": 0.0019, |
|
"step": 23575 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.777168923294e-05, |
|
"loss": 0.002, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.721598871203116e-05, |
|
"loss": 0.002, |
|
"step": 24725 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.666028819112233e-05, |
|
"loss": 0.0021, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.610458767021349e-05, |
|
"loss": 0.0022, |
|
"step": 25875 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.554888714930466e-05, |
|
"loss": 0.0022, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.499318662839582e-05, |
|
"loss": 0.0023, |
|
"step": 27025 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.443748610748699e-05, |
|
"loss": 0.0023, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.388178558657815e-05, |
|
"loss": 0.002, |
|
"step": 28175 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.33260850656693e-05, |
|
"loss": 0.0022, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.277038454476048e-05, |
|
"loss": 0.0022, |
|
"step": 29325 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.221468402385163e-05, |
|
"loss": 0.002, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.165898350294279e-05, |
|
"loss": 0.0023, |
|
"step": 30475 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.110328298203396e-05, |
|
"loss": 0.0026, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 8.054758246112512e-05, |
|
"loss": 0.0022, |
|
"step": 31625 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.999188194021629e-05, |
|
"loss": 0.0023, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.943618141930745e-05, |
|
"loss": 0.0023, |
|
"step": 32775 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.888048089839862e-05, |
|
"loss": 0.0024, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.832478037748978e-05, |
|
"loss": 0.0024, |
|
"step": 33925 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.776907985658095e-05, |
|
"loss": 0.0025, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.721337933567211e-05, |
|
"loss": 0.0023, |
|
"step": 35075 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.665767881476328e-05, |
|
"loss": 0.0021, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.610197829385444e-05, |
|
"loss": 0.0022, |
|
"step": 36225 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 7.554627777294561e-05, |
|
"loss": 0.002, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 7.499057725203676e-05, |
|
"loss": 0.0021, |
|
"step": 37375 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 7.443487673112794e-05, |
|
"loss": 0.0024, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 7.38791762102191e-05, |
|
"loss": 0.0024, |
|
"step": 38525 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.332347568931026e-05, |
|
"loss": 0.0022, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.276777516840142e-05, |
|
"loss": 0.0024, |
|
"step": 39675 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.221207464749259e-05, |
|
"loss": 0.0023, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 7.165637412658375e-05, |
|
"loss": 0.0023, |
|
"step": 40825 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.110067360567492e-05, |
|
"loss": 0.0022, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.054497308476608e-05, |
|
"loss": 0.0023, |
|
"step": 41975 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.998927256385725e-05, |
|
"loss": 0.0023, |
|
"step": 42550 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.943357204294841e-05, |
|
"loss": 0.0023, |
|
"step": 43125 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 6.887787152203957e-05, |
|
"loss": 0.0024, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.832217100113074e-05, |
|
"loss": 0.0024, |
|
"step": 44275 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.77664704802219e-05, |
|
"loss": 0.0022, |
|
"step": 44850 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.721076995931305e-05, |
|
"loss": 0.0022, |
|
"step": 45425 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.025934860110282898, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 21.6745, |
|
"eval_samples_per_second": 11.903, |
|
"eval_steps_per_second": 0.83, |
|
"step": 45988 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.665506943840422e-05, |
|
"loss": 0.0024, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.609936891749538e-05, |
|
"loss": 0.0019, |
|
"step": 46575 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.554366839658655e-05, |
|
"loss": 0.0021, |
|
"step": 47150 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.498796787567771e-05, |
|
"loss": 0.0019, |
|
"step": 47725 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.443226735476888e-05, |
|
"loss": 0.0021, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 6.387656683386004e-05, |
|
"loss": 0.0018, |
|
"step": 48875 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 6.332086631295121e-05, |
|
"loss": 0.002, |
|
"step": 49450 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 6.276516579204237e-05, |
|
"loss": 0.0021, |
|
"step": 50025 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 6.220946527113354e-05, |
|
"loss": 0.002, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 6.16537647502247e-05, |
|
"loss": 0.0019, |
|
"step": 51175 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 6.109806422931587e-05, |
|
"loss": 0.002, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.0542363708407024e-05, |
|
"loss": 0.0021, |
|
"step": 52325 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 5.9986663187498195e-05, |
|
"loss": 0.0021, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.943096266658935e-05, |
|
"loss": 0.0019, |
|
"step": 53475 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.8875262145680524e-05, |
|
"loss": 0.0018, |
|
"step": 54050 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.831956162477168e-05, |
|
"loss": 0.0021, |
|
"step": 54625 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.7763861103862846e-05, |
|
"loss": 0.0021, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.720816058295401e-05, |
|
"loss": 0.0019, |
|
"step": 55775 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.6652460062045174e-05, |
|
"loss": 0.002, |
|
"step": 56350 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5.609675954113633e-05, |
|
"loss": 0.0022, |
|
"step": 56925 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5.55410590202275e-05, |
|
"loss": 0.0018, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 5.498535849931866e-05, |
|
"loss": 0.0023, |
|
"step": 58075 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 5.442965797840983e-05, |
|
"loss": 0.0021, |
|
"step": 58650 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5.387395745750099e-05, |
|
"loss": 0.002, |
|
"step": 59225 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 5.331825693659216e-05, |
|
"loss": 0.0019, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 5.276255641568332e-05, |
|
"loss": 0.0021, |
|
"step": 60375 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.220685589477449e-05, |
|
"loss": 0.0022, |
|
"step": 60950 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.1651155373865647e-05, |
|
"loss": 0.0017, |
|
"step": 61525 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.109545485295682e-05, |
|
"loss": 0.0019, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.0539754332047975e-05, |
|
"loss": 0.002, |
|
"step": 62675 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.998405381113914e-05, |
|
"loss": 0.0019, |
|
"step": 63250 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.9428353290230304e-05, |
|
"loss": 0.0022, |
|
"step": 63825 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.887265276932147e-05, |
|
"loss": 0.0021, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.831695224841263e-05, |
|
"loss": 0.0023, |
|
"step": 64975 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.77612517275038e-05, |
|
"loss": 0.002, |
|
"step": 65550 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.720555120659496e-05, |
|
"loss": 0.002, |
|
"step": 66125 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 4.6649850685686126e-05, |
|
"loss": 0.0021, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.609415016477729e-05, |
|
"loss": 0.0021, |
|
"step": 67275 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.5538449643868454e-05, |
|
"loss": 0.002, |
|
"step": 67850 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.498274912295962e-05, |
|
"loss": 0.0019, |
|
"step": 68425 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.029171258211135864, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 20.5415, |
|
"eval_samples_per_second": 12.56, |
|
"eval_steps_per_second": 0.876, |
|
"step": 68982 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.442704860205078e-05, |
|
"loss": 0.0021, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.387134808114195e-05, |
|
"loss": 0.0017, |
|
"step": 69575 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.3315647560233105e-05, |
|
"loss": 0.002, |
|
"step": 70150 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.275994703932427e-05, |
|
"loss": 0.0016, |
|
"step": 70725 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.2204246518415434e-05, |
|
"loss": 0.0018, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 4.16485459975066e-05, |
|
"loss": 0.0018, |
|
"step": 71875 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.109284547659776e-05, |
|
"loss": 0.0018, |
|
"step": 72450 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.0537144955688927e-05, |
|
"loss": 0.0018, |
|
"step": 73025 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.998144443478009e-05, |
|
"loss": 0.002, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.9425743913871255e-05, |
|
"loss": 0.002, |
|
"step": 74175 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.887004339296242e-05, |
|
"loss": 0.0019, |
|
"step": 74750 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.8314342872053584e-05, |
|
"loss": 0.0018, |
|
"step": 75325 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.775864235114475e-05, |
|
"loss": 0.0018, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.720294183023591e-05, |
|
"loss": 0.0017, |
|
"step": 76475 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.664724130932708e-05, |
|
"loss": 0.0017, |
|
"step": 77050 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.6091540788418234e-05, |
|
"loss": 0.0018, |
|
"step": 77625 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.55358402675094e-05, |
|
"loss": 0.0018, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.498013974660056e-05, |
|
"loss": 0.0016, |
|
"step": 78775 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.442443922569173e-05, |
|
"loss": 0.0016, |
|
"step": 79350 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.386873870478289e-05, |
|
"loss": 0.0018, |
|
"step": 79925 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.3313038183874056e-05, |
|
"loss": 0.0017, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.275733766296522e-05, |
|
"loss": 0.0017, |
|
"step": 81075 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.2201637142056385e-05, |
|
"loss": 0.0016, |
|
"step": 81650 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.164593662114755e-05, |
|
"loss": 0.002, |
|
"step": 82225 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 3.1090236100238714e-05, |
|
"loss": 0.0018, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 3.053453557932988e-05, |
|
"loss": 0.0016, |
|
"step": 83375 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.997883505842104e-05, |
|
"loss": 0.0017, |
|
"step": 83950 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.9423134537512203e-05, |
|
"loss": 0.0019, |
|
"step": 84525 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.8867434016603368e-05, |
|
"loss": 0.0018, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.8311733495694532e-05, |
|
"loss": 0.0017, |
|
"step": 85675 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.7756032974785696e-05, |
|
"loss": 0.0017, |
|
"step": 86250 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.720033245387686e-05, |
|
"loss": 0.0019, |
|
"step": 86825 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.664463193296802e-05, |
|
"loss": 0.002, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.6088931412059186e-05, |
|
"loss": 0.0019, |
|
"step": 87975 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.553323089115035e-05, |
|
"loss": 0.0017, |
|
"step": 88550 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.4977530370241514e-05, |
|
"loss": 0.0018, |
|
"step": 89125 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.442182984933268e-05, |
|
"loss": 0.0018, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.3866129328423843e-05, |
|
"loss": 0.0018, |
|
"step": 90275 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.3310428807515004e-05, |
|
"loss": 0.0019, |
|
"step": 90850 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.275472828660617e-05, |
|
"loss": 0.0016, |
|
"step": 91425 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.02807791158556938, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 20.9058, |
|
"eval_samples_per_second": 12.341, |
|
"eval_steps_per_second": 0.861, |
|
"step": 91976 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.2199027765697333e-05, |
|
"loss": 0.0019, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 2.1643327244788497e-05, |
|
"loss": 0.0015, |
|
"step": 92575 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.108762672387966e-05, |
|
"loss": 0.0016, |
|
"step": 93150 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.0531926202970826e-05, |
|
"loss": 0.0015, |
|
"step": 93725 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.997622568206199e-05, |
|
"loss": 0.0017, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.942052516115315e-05, |
|
"loss": 0.0016, |
|
"step": 94875 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.8864824640244315e-05, |
|
"loss": 0.0017, |
|
"step": 95450 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.830912411933548e-05, |
|
"loss": 0.0017, |
|
"step": 96025 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.7753423598426644e-05, |
|
"loss": 0.0015, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.719772307751781e-05, |
|
"loss": 0.0018, |
|
"step": 97175 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.6642022556608973e-05, |
|
"loss": 0.0018, |
|
"step": 97750 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.6086322035700134e-05, |
|
"loss": 0.0014, |
|
"step": 98325 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.5530621514791298e-05, |
|
"loss": 0.0016, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.4974920993882462e-05, |
|
"loss": 0.0018, |
|
"step": 99475 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.4419220472973627e-05, |
|
"loss": 0.0015, |
|
"step": 100050 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.3863519952064791e-05, |
|
"loss": 0.0019, |
|
"step": 100625 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.3307819431155954e-05, |
|
"loss": 0.0016, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.2752118910247118e-05, |
|
"loss": 0.0015, |
|
"step": 101775 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.2196418389338282e-05, |
|
"loss": 0.0017, |
|
"step": 102350 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.1640717868429447e-05, |
|
"loss": 0.0016, |
|
"step": 102925 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.108501734752061e-05, |
|
"loss": 0.0015, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.0529316826611774e-05, |
|
"loss": 0.0017, |
|
"step": 104075 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 9.973616305702938e-06, |
|
"loss": 0.0016, |
|
"step": 104650 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 9.4179157847941e-06, |
|
"loss": 0.0016, |
|
"step": 105225 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.862215263885265e-06, |
|
"loss": 0.0017, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 8.30651474297643e-06, |
|
"loss": 0.0015, |
|
"step": 106375 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 7.750814222067592e-06, |
|
"loss": 0.0014, |
|
"step": 106950 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 7.195113701158756e-06, |
|
"loss": 0.0017, |
|
"step": 107525 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.639413180249921e-06, |
|
"loss": 0.0017, |
|
"step": 108100 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 6.083712659341084e-06, |
|
"loss": 0.0016, |
|
"step": 108675 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 5.528012138432248e-06, |
|
"loss": 0.0017, |
|
"step": 109250 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.972311617523412e-06, |
|
"loss": 0.0018, |
|
"step": 109825 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.4166110966145756e-06, |
|
"loss": 0.0015, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.86091057570574e-06, |
|
"loss": 0.0017, |
|
"step": 110975 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.3052100547969034e-06, |
|
"loss": 0.0014, |
|
"step": 111550 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.7495095338880677e-06, |
|
"loss": 0.0018, |
|
"step": 112125 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.1938090129792312e-06, |
|
"loss": 0.0017, |
|
"step": 112700 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.6381084920703951e-06, |
|
"loss": 0.0018, |
|
"step": 113275 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.082407971161559e-06, |
|
"loss": 0.0015, |
|
"step": 113850 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 5.26707450252723e-07, |
|
"loss": 0.0016, |
|
"step": 114425 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.027882983908057213, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 20.563, |
|
"eval_samples_per_second": 12.547, |
|
"eval_steps_per_second": 0.875, |
|
"step": 114970 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 114970, |
|
"total_flos": 2.9139726999711744e+16, |
|
"train_loss": 0.0019824859863435676, |
|
"train_runtime": 8494.2801, |
|
"train_samples_per_second": 203.021, |
|
"train_steps_per_second": 13.535 |
|
} |
|
], |
|
"logging_steps": 575, |
|
"max_steps": 114970, |
|
"num_train_epochs": 5, |
|
"save_steps": 1150, |
|
"total_flos": 2.9139726999711744e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|