|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 72740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3747594171020073e-08, |
|
"loss": 0.0014, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.0041242782513065e-06, |
|
"loss": 0.0035, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.0008248556502613e-05, |
|
"loss": 0.0031, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5012372834753918e-05, |
|
"loss": 0.003, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0016497113005226e-05, |
|
"loss": 0.0026, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5020621391256532e-05, |
|
"loss": 0.0029, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.0024745669507835e-05, |
|
"loss": 0.0017, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.5028869947759145e-05, |
|
"loss": 0.002, |
|
"step": 2548 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.003299422601045e-05, |
|
"loss": 0.0016, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.503711850426176e-05, |
|
"loss": 0.0016, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 8.495924816998013e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3681, |
|
"eval_samples_per_second": 21.114, |
|
"eval_steps_per_second": 0.845, |
|
"step": 3637 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.0041242782513065e-05, |
|
"loss": 0.0018, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.5045367060764364e-05, |
|
"loss": 0.0018, |
|
"step": 4004 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.004949133901567e-05, |
|
"loss": 0.0014, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.505361561726698e-05, |
|
"loss": 0.0017, |
|
"step": 4732 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.005773989551829e-05, |
|
"loss": 0.001, |
|
"step": 5096 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.506186417376959e-05, |
|
"loss": 0.0012, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.00659884520209e-05, |
|
"loss": 0.0012, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.507011273027222e-05, |
|
"loss": 0.0011, |
|
"step": 6188 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.007423700852352e-05, |
|
"loss": 0.0009, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.507836128677482e-05, |
|
"loss": 0.0015, |
|
"step": 6916 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.1638746855169302e-06, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3712, |
|
"eval_samples_per_second": 21.087, |
|
"eval_steps_per_second": 0.843, |
|
"step": 7274 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.999083493721932e-05, |
|
"loss": 0.001, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.943482112852474e-05, |
|
"loss": 0.0012, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.887880731983015e-05, |
|
"loss": 0.001, |
|
"step": 8008 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 9.832279351113555e-05, |
|
"loss": 0.0011, |
|
"step": 8372 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.776677970244097e-05, |
|
"loss": 0.001, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.721076589374639e-05, |
|
"loss": 0.001, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.665475208505179e-05, |
|
"loss": 0.0008, |
|
"step": 9464 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.60987382763572e-05, |
|
"loss": 0.001, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 9.55427244676626e-05, |
|
"loss": 0.001, |
|
"step": 10192 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.498671065896801e-05, |
|
"loss": 0.0011, |
|
"step": 10556 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.2921987035952043e-06, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3627, |
|
"eval_samples_per_second": 21.162, |
|
"eval_steps_per_second": 0.846, |
|
"step": 10911 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.443069685027342e-05, |
|
"loss": 0.0009, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.387468304157884e-05, |
|
"loss": 0.0008, |
|
"step": 11284 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.331866923288425e-05, |
|
"loss": 0.001, |
|
"step": 11648 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 9.276265542418966e-05, |
|
"loss": 0.0008, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.220664161549507e-05, |
|
"loss": 0.0006, |
|
"step": 12376 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 9.165062780680049e-05, |
|
"loss": 0.0008, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.109461399810589e-05, |
|
"loss": 0.0008, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 9.05386001894113e-05, |
|
"loss": 0.0009, |
|
"step": 13468 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.998258638071672e-05, |
|
"loss": 0.0007, |
|
"step": 13832 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.942657257202212e-05, |
|
"loss": 0.001, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.308262310179998e-06, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.356, |
|
"eval_samples_per_second": 21.223, |
|
"eval_steps_per_second": 0.849, |
|
"step": 14548 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.887055876332754e-05, |
|
"loss": 0.0009, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 8.831454495463294e-05, |
|
"loss": 0.0008, |
|
"step": 14924 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.775853114593834e-05, |
|
"loss": 0.0006, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 8.720251733724376e-05, |
|
"loss": 0.0005, |
|
"step": 15652 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.664650352854918e-05, |
|
"loss": 0.0007, |
|
"step": 16016 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 8.609048971985458e-05, |
|
"loss": 0.0007, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.553447591115999e-05, |
|
"loss": 0.0008, |
|
"step": 16744 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 8.497846210246541e-05, |
|
"loss": 0.0006, |
|
"step": 17108 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.442244829377081e-05, |
|
"loss": 0.0007, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 8.386643448507623e-05, |
|
"loss": 0.0007, |
|
"step": 17836 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 5.198210146772908e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3541, |
|
"eval_samples_per_second": 21.24, |
|
"eval_steps_per_second": 0.85, |
|
"step": 18185 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 8.331042067638164e-05, |
|
"loss": 0.0007, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 8.275440686768706e-05, |
|
"loss": 0.0006, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 8.219839305899246e-05, |
|
"loss": 0.0007, |
|
"step": 18928 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 8.164237925029788e-05, |
|
"loss": 0.0008, |
|
"step": 19292 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 8.108636544160328e-05, |
|
"loss": 0.0007, |
|
"step": 19656 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 8.053035163290868e-05, |
|
"loss": 0.0006, |
|
"step": 20020 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.99743378242141e-05, |
|
"loss": 0.0006, |
|
"step": 20384 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 7.941832401551951e-05, |
|
"loss": 0.0007, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.886231020682491e-05, |
|
"loss": 0.0006, |
|
"step": 21112 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.830629639813033e-05, |
|
"loss": 0.0008, |
|
"step": 21476 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 5.296922722664021e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3254, |
|
"eval_samples_per_second": 21.501, |
|
"eval_steps_per_second": 0.86, |
|
"step": 21822 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.775028258943574e-05, |
|
"loss": 0.0007, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 7.719426878074115e-05, |
|
"loss": 0.0005, |
|
"step": 22204 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 7.663825497204656e-05, |
|
"loss": 0.0006, |
|
"step": 22568 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 7.608224116335198e-05, |
|
"loss": 0.0006, |
|
"step": 22932 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 7.552622735465738e-05, |
|
"loss": 0.0006, |
|
"step": 23296 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 7.49702135459628e-05, |
|
"loss": 0.0006, |
|
"step": 23660 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 7.441419973726821e-05, |
|
"loss": 0.0006, |
|
"step": 24024 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 7.385818592857361e-05, |
|
"loss": 0.0008, |
|
"step": 24388 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 7.330217211987902e-05, |
|
"loss": 0.0005, |
|
"step": 24752 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 7.274615831118443e-05, |
|
"loss": 0.0006, |
|
"step": 25116 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 8.367381951757125e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3096, |
|
"eval_samples_per_second": 21.649, |
|
"eval_steps_per_second": 0.866, |
|
"step": 25459 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 7.219014450248985e-05, |
|
"loss": 0.0006, |
|
"step": 25480 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 7.163413069379525e-05, |
|
"loss": 0.0007, |
|
"step": 25844 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 7.107811688510066e-05, |
|
"loss": 0.0005, |
|
"step": 26208 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 7.052210307640608e-05, |
|
"loss": 0.0005, |
|
"step": 26572 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 6.996608926771148e-05, |
|
"loss": 0.0006, |
|
"step": 26936 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 6.94100754590169e-05, |
|
"loss": 0.0004, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 6.885406165032231e-05, |
|
"loss": 0.0006, |
|
"step": 27664 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 6.829804784162772e-05, |
|
"loss": 0.0005, |
|
"step": 28028 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 6.774203403293313e-05, |
|
"loss": 0.0006, |
|
"step": 28392 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 6.718602022423855e-05, |
|
"loss": 0.0007, |
|
"step": 28756 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 7.714121466051438e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3368, |
|
"eval_samples_per_second": 21.397, |
|
"eval_steps_per_second": 0.856, |
|
"step": 29096 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 6.663000641554395e-05, |
|
"loss": 0.0006, |
|
"step": 29120 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 6.607399260684935e-05, |
|
"loss": 0.0004, |
|
"step": 29484 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 6.551797879815477e-05, |
|
"loss": 0.0006, |
|
"step": 29848 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 6.496196498946018e-05, |
|
"loss": 0.0005, |
|
"step": 30212 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 6.440595118076558e-05, |
|
"loss": 0.0006, |
|
"step": 30576 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 6.3849937372071e-05, |
|
"loss": 0.0005, |
|
"step": 30940 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 6.329392356337642e-05, |
|
"loss": 0.0004, |
|
"step": 31304 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 6.273790975468182e-05, |
|
"loss": 0.0005, |
|
"step": 31668 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 6.218189594598723e-05, |
|
"loss": 0.0005, |
|
"step": 32032 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 6.162588213729265e-05, |
|
"loss": 0.0006, |
|
"step": 32396 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 7.54007260184153e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3672, |
|
"eval_samples_per_second": 21.122, |
|
"eval_steps_per_second": 0.845, |
|
"step": 32733 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 6.106986832859805e-05, |
|
"loss": 0.0005, |
|
"step": 32760 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 6.051385451990347e-05, |
|
"loss": 0.0006, |
|
"step": 33124 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 5.9957840711208876e-05, |
|
"loss": 0.0005, |
|
"step": 33488 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 5.940182690251429e-05, |
|
"loss": 0.0005, |
|
"step": 33852 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 5.884581309381969e-05, |
|
"loss": 0.0005, |
|
"step": 34216 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 5.82897992851251e-05, |
|
"loss": 0.0005, |
|
"step": 34580 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 5.773378547643051e-05, |
|
"loss": 0.0004, |
|
"step": 34944 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 5.717777166773593e-05, |
|
"loss": 0.0004, |
|
"step": 35308 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 5.6621757859041336e-05, |
|
"loss": 0.0005, |
|
"step": 35672 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 5.6065744050346745e-05, |
|
"loss": 0.0004, |
|
"step": 36036 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.4673248642793624e-06, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3769, |
|
"eval_samples_per_second": 21.036, |
|
"eval_steps_per_second": 0.841, |
|
"step": 36370 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 5.550973024165216e-05, |
|
"loss": 0.0005, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 5.495371643295757e-05, |
|
"loss": 0.0005, |
|
"step": 36764 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 5.439770262426298e-05, |
|
"loss": 0.0005, |
|
"step": 37128 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 5.3841688815568394e-05, |
|
"loss": 0.0004, |
|
"step": 37492 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 5.32856750068738e-05, |
|
"loss": 0.0005, |
|
"step": 37856 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 5.272966119817921e-05, |
|
"loss": 0.0004, |
|
"step": 38220 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 5.217364738948463e-05, |
|
"loss": 0.0005, |
|
"step": 38584 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 5.161763358079002e-05, |
|
"loss": 0.0005, |
|
"step": 38948 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 5.106161977209544e-05, |
|
"loss": 0.0004, |
|
"step": 39312 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 5.050560596340085e-05, |
|
"loss": 0.0005, |
|
"step": 39676 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 8.162444373738253e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3085, |
|
"eval_samples_per_second": 21.66, |
|
"eval_steps_per_second": 0.866, |
|
"step": 40007 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 4.9949592154706257e-05, |
|
"loss": 0.0005, |
|
"step": 40040 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 4.939357834601167e-05, |
|
"loss": 0.0004, |
|
"step": 40404 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 4.883756453731708e-05, |
|
"loss": 0.0004, |
|
"step": 40768 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.828155072862249e-05, |
|
"loss": 0.0004, |
|
"step": 41132 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 4.7725536919927906e-05, |
|
"loss": 0.0005, |
|
"step": 41496 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 4.7169523111233314e-05, |
|
"loss": 0.0005, |
|
"step": 41860 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 4.661350930253873e-05, |
|
"loss": 0.0005, |
|
"step": 42224 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 4.605749549384413e-05, |
|
"loss": 0.0004, |
|
"step": 42588 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 4.550148168514954e-05, |
|
"loss": 0.0004, |
|
"step": 42952 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 4.494546787645496e-05, |
|
"loss": 0.0004, |
|
"step": 43316 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.4554426570612122e-06, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.31, |
|
"eval_samples_per_second": 21.645, |
|
"eval_steps_per_second": 0.866, |
|
"step": 43644 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.4389454067760366e-05, |
|
"loss": 0.0003, |
|
"step": 43680 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 4.3833440259065775e-05, |
|
"loss": 0.0004, |
|
"step": 44044 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 4.327742645037119e-05, |
|
"loss": 0.0003, |
|
"step": 44408 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 4.27214126416766e-05, |
|
"loss": 0.0004, |
|
"step": 44772 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 4.216539883298201e-05, |
|
"loss": 0.0004, |
|
"step": 45136 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 4.160938502428742e-05, |
|
"loss": 0.0004, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 4.1053371215592826e-05, |
|
"loss": 0.0004, |
|
"step": 45864 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 4.049735740689824e-05, |
|
"loss": 0.0005, |
|
"step": 46228 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 3.994134359820365e-05, |
|
"loss": 0.0005, |
|
"step": 46592 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 3.938532978950906e-05, |
|
"loss": 0.0004, |
|
"step": 46956 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 8.021904704946792e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3634, |
|
"eval_samples_per_second": 21.156, |
|
"eval_steps_per_second": 0.846, |
|
"step": 47281 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 3.882931598081447e-05, |
|
"loss": 0.0003, |
|
"step": 47320 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 3.827330217211988e-05, |
|
"loss": 0.0004, |
|
"step": 47684 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 3.771728836342529e-05, |
|
"loss": 0.0004, |
|
"step": 48048 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 3.71612745547307e-05, |
|
"loss": 0.0004, |
|
"step": 48412 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 3.660526074603611e-05, |
|
"loss": 0.0004, |
|
"step": 48776 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3.6049246937341526e-05, |
|
"loss": 0.0004, |
|
"step": 49140 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 3.5493233128646935e-05, |
|
"loss": 0.0004, |
|
"step": 49504 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 3.4937219319952344e-05, |
|
"loss": 0.0004, |
|
"step": 49868 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 3.438120551125775e-05, |
|
"loss": 0.0004, |
|
"step": 50232 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 3.382519170256316e-05, |
|
"loss": 0.0005, |
|
"step": 50596 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 6.187271992530441e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.322, |
|
"eval_samples_per_second": 21.533, |
|
"eval_steps_per_second": 0.861, |
|
"step": 50918 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 3.326917789386858e-05, |
|
"loss": 0.0005, |
|
"step": 50960 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 3.2713164085173986e-05, |
|
"loss": 0.0005, |
|
"step": 51324 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 3.2157150276479395e-05, |
|
"loss": 0.0003, |
|
"step": 51688 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 3.1601136467784804e-05, |
|
"loss": 0.0004, |
|
"step": 52052 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 3.104512265909021e-05, |
|
"loss": 0.0004, |
|
"step": 52416 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 3.0489108850395625e-05, |
|
"loss": 0.0004, |
|
"step": 52780 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 2.9933095041701037e-05, |
|
"loss": 0.0004, |
|
"step": 53144 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 2.937708123300645e-05, |
|
"loss": 0.0004, |
|
"step": 53508 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 2.882106742431186e-05, |
|
"loss": 0.0004, |
|
"step": 53872 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 2.826505361561727e-05, |
|
"loss": 0.0004, |
|
"step": 54236 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 4.629501972885919e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3761, |
|
"eval_samples_per_second": 21.043, |
|
"eval_steps_per_second": 0.842, |
|
"step": 54555 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 2.7709039806922676e-05, |
|
"loss": 0.0005, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 2.715302599822809e-05, |
|
"loss": 0.0003, |
|
"step": 54964 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 2.6597012189533497e-05, |
|
"loss": 0.0003, |
|
"step": 55328 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 2.604099838083891e-05, |
|
"loss": 0.0003, |
|
"step": 55692 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 2.5484984572144322e-05, |
|
"loss": 0.0003, |
|
"step": 56056 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 2.492897076344973e-05, |
|
"loss": 0.0003, |
|
"step": 56420 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 2.437295695475514e-05, |
|
"loss": 0.0004, |
|
"step": 56784 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 2.3816943146060552e-05, |
|
"loss": 0.0003, |
|
"step": 57148 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 2.326092933736596e-05, |
|
"loss": 0.0004, |
|
"step": 57512 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 2.2704915528671373e-05, |
|
"loss": 0.0004, |
|
"step": 57876 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 4.1432366515437025e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3527, |
|
"eval_samples_per_second": 21.253, |
|
"eval_steps_per_second": 0.85, |
|
"step": 58192 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 2.2148901719976782e-05, |
|
"loss": 0.0004, |
|
"step": 58240 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 2.1592887911282194e-05, |
|
"loss": 0.0004, |
|
"step": 58604 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 2.1036874102587603e-05, |
|
"loss": 0.0004, |
|
"step": 58968 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 2.0480860293893016e-05, |
|
"loss": 0.0004, |
|
"step": 59332 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 1.9924846485198424e-05, |
|
"loss": 0.0003, |
|
"step": 59696 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 1.9368832676503833e-05, |
|
"loss": 0.0004, |
|
"step": 60060 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 1.8812818867809246e-05, |
|
"loss": 0.0003, |
|
"step": 60424 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 1.8256805059114658e-05, |
|
"loss": 0.0004, |
|
"step": 60788 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 1.7700791250420067e-05, |
|
"loss": 0.0005, |
|
"step": 61152 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 1.7144777441725476e-05, |
|
"loss": 0.0004, |
|
"step": 61516 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 6.78707863244199e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3415, |
|
"eval_samples_per_second": 21.354, |
|
"eval_steps_per_second": 0.854, |
|
"step": 61829 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 1.6588763633030888e-05, |
|
"loss": 0.0004, |
|
"step": 61880 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 1.6032749824336297e-05, |
|
"loss": 0.0003, |
|
"step": 62244 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 1.547673601564171e-05, |
|
"loss": 0.0003, |
|
"step": 62608 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 1.4920722206947118e-05, |
|
"loss": 0.0003, |
|
"step": 62972 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 1.436470839825253e-05, |
|
"loss": 0.0003, |
|
"step": 63336 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 1.3808694589557939e-05, |
|
"loss": 0.0004, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 1.325268078086335e-05, |
|
"loss": 0.0004, |
|
"step": 64064 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 1.269666697216876e-05, |
|
"loss": 0.0004, |
|
"step": 64428 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 1.2140653163474171e-05, |
|
"loss": 0.0003, |
|
"step": 64792 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 1.1584639354779581e-05, |
|
"loss": 0.0004, |
|
"step": 65156 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 4.647758657938539e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3851, |
|
"eval_samples_per_second": 20.964, |
|
"eval_steps_per_second": 0.839, |
|
"step": 65466 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 1.1028625546084992e-05, |
|
"loss": 0.0003, |
|
"step": 65520 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 1.0472611737390403e-05, |
|
"loss": 0.0003, |
|
"step": 65884 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 9.916597928695811e-06, |
|
"loss": 0.0005, |
|
"step": 66248 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 9.360584120001222e-06, |
|
"loss": 0.0003, |
|
"step": 66612 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 8.804570311306633e-06, |
|
"loss": 0.0003, |
|
"step": 66976 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 8.248556502612043e-06, |
|
"loss": 0.0004, |
|
"step": 67340 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 7.692542693917454e-06, |
|
"loss": 0.0004, |
|
"step": 67704 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 7.1365288852228635e-06, |
|
"loss": 0.0004, |
|
"step": 68068 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"learning_rate": 6.580515076528275e-06, |
|
"loss": 0.0004, |
|
"step": 68432 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 6.024501267833685e-06, |
|
"loss": 0.0004, |
|
"step": 68796 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 6.43872112959798e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3968, |
|
"eval_samples_per_second": 20.861, |
|
"eval_steps_per_second": 0.834, |
|
"step": 69103 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 5.468487459139095e-06, |
|
"loss": 0.0003, |
|
"step": 69160 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 4.912473650444506e-06, |
|
"loss": 0.0003, |
|
"step": 69524 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 4.3564598417499164e-06, |
|
"loss": 0.0004, |
|
"step": 69888 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"learning_rate": 3.8004460330553266e-06, |
|
"loss": 0.0003, |
|
"step": 70252 |
|
}, |
|
{ |
|
"epoch": 19.42, |
|
"learning_rate": 3.244432224360737e-06, |
|
"loss": 0.0003, |
|
"step": 70616 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 2.6884184156661473e-06, |
|
"loss": 0.0004, |
|
"step": 70980 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 2.132404606971558e-06, |
|
"loss": 0.0003, |
|
"step": 71344 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 1.5763907982769683e-06, |
|
"loss": 0.0003, |
|
"step": 71708 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"learning_rate": 1.0203769895823786e-06, |
|
"loss": 0.0004, |
|
"step": 72072 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"learning_rate": 4.6436318088778905e-07, |
|
"loss": 0.0004, |
|
"step": 72436 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 7.036084070932702e-07, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3249, |
|
"eval_samples_per_second": 21.506, |
|
"eval_steps_per_second": 0.86, |
|
"step": 72740 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 72740, |
|
"total_flos": 4.361497895702938e+16, |
|
"train_loss": 0.0006492722850091394, |
|
"train_runtime": 7118.3904, |
|
"train_samples_per_second": 306.491, |
|
"train_steps_per_second": 10.219 |
|
} |
|
], |
|
"logging_steps": 364, |
|
"max_steps": 72740, |
|
"num_train_epochs": 20, |
|
"save_steps": 728, |
|
"total_flos": 4.361497895702938e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|