{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 72740, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3747594171020073e-08, "loss": 0.0014, "step": 1 }, { "epoch": 0.1, "learning_rate": 5.0041242782513065e-06, "loss": 0.0035, "step": 364 }, { "epoch": 0.2, "learning_rate": 1.0008248556502613e-05, "loss": 0.0031, "step": 728 }, { "epoch": 0.3, "learning_rate": 1.5012372834753918e-05, "loss": 0.003, "step": 1092 }, { "epoch": 0.4, "learning_rate": 2.0016497113005226e-05, "loss": 0.0026, "step": 1456 }, { "epoch": 0.5, "learning_rate": 2.5020621391256532e-05, "loss": 0.0029, "step": 1820 }, { "epoch": 0.6, "learning_rate": 3.0024745669507835e-05, "loss": 0.0017, "step": 2184 }, { "epoch": 0.7, "learning_rate": 3.5028869947759145e-05, "loss": 0.002, "step": 2548 }, { "epoch": 0.8, "learning_rate": 4.003299422601045e-05, "loss": 0.0016, "step": 2912 }, { "epoch": 0.9, "learning_rate": 4.503711850426176e-05, "loss": 0.0016, "step": 3276 }, { "epoch": 1.0, "eval_loss": 8.495924816998013e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3681, "eval_samples_per_second": 21.114, "eval_steps_per_second": 0.845, "step": 3637 }, { "epoch": 1.0, "learning_rate": 5.0041242782513065e-05, "loss": 0.0018, "step": 3640 }, { "epoch": 1.1, "learning_rate": 5.5045367060764364e-05, "loss": 0.0018, "step": 4004 }, { "epoch": 1.2, "learning_rate": 6.004949133901567e-05, "loss": 0.0014, "step": 4368 }, { "epoch": 1.3, "learning_rate": 6.505361561726698e-05, "loss": 0.0017, "step": 4732 }, { "epoch": 1.4, "learning_rate": 7.005773989551829e-05, "loss": 0.001, "step": 5096 }, { "epoch": 1.5, "learning_rate": 7.506186417376959e-05, "loss": 0.0012, "step": 5460 }, { "epoch": 1.6, "learning_rate": 8.00659884520209e-05, "loss": 0.0012, "step": 5824 }, { "epoch": 1.7, "learning_rate": 8.507011273027222e-05, "loss": 0.0011, "step": 6188 }, { "epoch": 1.8, "learning_rate": 9.007423700852352e-05, "loss": 0.0009, "step": 6552 }, { "epoch": 1.9, "learning_rate": 9.507836128677482e-05, "loss": 0.0015, "step": 6916 }, { "epoch": 2.0, "eval_loss": 1.1638746855169302e-06, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3712, "eval_samples_per_second": 21.087, "eval_steps_per_second": 0.843, "step": 7274 }, { "epoch": 2.0, "learning_rate": 9.999083493721932e-05, "loss": 0.001, "step": 7280 }, { "epoch": 2.1, "learning_rate": 9.943482112852474e-05, "loss": 0.0012, "step": 7644 }, { "epoch": 2.2, "learning_rate": 9.887880731983015e-05, "loss": 0.001, "step": 8008 }, { "epoch": 2.3, "learning_rate": 9.832279351113555e-05, "loss": 0.0011, "step": 8372 }, { "epoch": 2.4, "learning_rate": 9.776677970244097e-05, "loss": 0.001, "step": 8736 }, { "epoch": 2.5, "learning_rate": 9.721076589374639e-05, "loss": 0.001, "step": 9100 }, { "epoch": 2.6, "learning_rate": 9.665475208505179e-05, "loss": 0.0008, "step": 9464 }, { "epoch": 2.7, "learning_rate": 9.60987382763572e-05, "loss": 0.001, "step": 9828 }, { "epoch": 2.8, "learning_rate": 9.55427244676626e-05, "loss": 0.001, "step": 10192 }, { "epoch": 2.9, "learning_rate": 9.498671065896801e-05, "loss": 0.0011, "step": 10556 }, { "epoch": 3.0, "eval_loss": 1.2921987035952043e-06, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3627, "eval_samples_per_second": 21.162, "eval_steps_per_second": 0.846, "step": 10911 }, { "epoch": 3.0, "learning_rate": 9.443069685027342e-05, "loss": 0.0009, "step": 10920 }, { "epoch": 3.1, "learning_rate": 9.387468304157884e-05, "loss": 0.0008, "step": 11284 }, { "epoch": 3.2, "learning_rate": 9.331866923288425e-05, "loss": 0.001, "step": 11648 }, { "epoch": 3.3, "learning_rate": 9.276265542418966e-05, "loss": 0.0008, "step": 12012 }, { "epoch": 3.4, "learning_rate": 9.220664161549507e-05, "loss": 0.0006, "step": 12376 }, { "epoch": 3.5, "learning_rate": 9.165062780680049e-05, "loss": 0.0008, "step": 12740 }, { "epoch": 3.6, "learning_rate": 9.109461399810589e-05, "loss": 0.0008, "step": 13104 }, { "epoch": 3.7, "learning_rate": 9.05386001894113e-05, "loss": 0.0009, "step": 13468 }, { "epoch": 3.8, "learning_rate": 8.998258638071672e-05, "loss": 0.0007, "step": 13832 }, { "epoch": 3.9, "learning_rate": 8.942657257202212e-05, "loss": 0.001, "step": 14196 }, { "epoch": 4.0, "eval_loss": 2.308262310179998e-06, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.356, "eval_samples_per_second": 21.223, "eval_steps_per_second": 0.849, "step": 14548 }, { "epoch": 4.0, "learning_rate": 8.887055876332754e-05, "loss": 0.0009, "step": 14560 }, { "epoch": 4.1, "learning_rate": 8.831454495463294e-05, "loss": 0.0008, "step": 14924 }, { "epoch": 4.2, "learning_rate": 8.775853114593834e-05, "loss": 0.0006, "step": 15288 }, { "epoch": 4.3, "learning_rate": 8.720251733724376e-05, "loss": 0.0005, "step": 15652 }, { "epoch": 4.4, "learning_rate": 8.664650352854918e-05, "loss": 0.0007, "step": 16016 }, { "epoch": 4.5, "learning_rate": 8.609048971985458e-05, "loss": 0.0007, "step": 16380 }, { "epoch": 4.6, "learning_rate": 8.553447591115999e-05, "loss": 0.0008, "step": 16744 }, { "epoch": 4.7, "learning_rate": 8.497846210246541e-05, "loss": 0.0006, "step": 17108 }, { "epoch": 4.8, "learning_rate": 8.442244829377081e-05, "loss": 0.0007, "step": 17472 }, { "epoch": 4.9, "learning_rate": 8.386643448507623e-05, "loss": 0.0007, "step": 17836 }, { "epoch": 5.0, "eval_loss": 5.198210146772908e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3541, "eval_samples_per_second": 21.24, "eval_steps_per_second": 0.85, "step": 18185 }, { "epoch": 5.0, "learning_rate": 8.331042067638164e-05, "loss": 0.0007, "step": 18200 }, { "epoch": 5.1, "learning_rate": 8.275440686768706e-05, "loss": 0.0006, "step": 18564 }, { "epoch": 5.2, "learning_rate": 8.219839305899246e-05, "loss": 0.0007, "step": 18928 }, { "epoch": 5.3, "learning_rate": 8.164237925029788e-05, "loss": 0.0008, "step": 19292 }, { "epoch": 5.4, "learning_rate": 8.108636544160328e-05, "loss": 0.0007, "step": 19656 }, { "epoch": 5.5, "learning_rate": 8.053035163290868e-05, "loss": 0.0006, "step": 20020 }, { "epoch": 5.6, "learning_rate": 7.99743378242141e-05, "loss": 0.0006, "step": 20384 }, { "epoch": 5.7, "learning_rate": 7.941832401551951e-05, "loss": 0.0007, "step": 20748 }, { "epoch": 5.8, "learning_rate": 7.886231020682491e-05, "loss": 0.0006, "step": 21112 }, { "epoch": 5.9, "learning_rate": 7.830629639813033e-05, "loss": 0.0008, "step": 21476 }, { "epoch": 6.0, "eval_loss": 5.296922722664021e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3254, "eval_samples_per_second": 21.501, "eval_steps_per_second": 0.86, "step": 21822 }, { "epoch": 6.0, "learning_rate": 7.775028258943574e-05, "loss": 0.0007, "step": 21840 }, { "epoch": 6.11, "learning_rate": 7.719426878074115e-05, "loss": 0.0005, "step": 22204 }, { "epoch": 6.21, "learning_rate": 7.663825497204656e-05, "loss": 0.0006, "step": 22568 }, { "epoch": 6.31, "learning_rate": 7.608224116335198e-05, "loss": 0.0006, "step": 22932 }, { "epoch": 6.41, "learning_rate": 7.552622735465738e-05, "loss": 0.0006, "step": 23296 }, { "epoch": 6.51, "learning_rate": 7.49702135459628e-05, "loss": 0.0006, "step": 23660 }, { "epoch": 6.61, "learning_rate": 7.441419973726821e-05, "loss": 0.0006, "step": 24024 }, { "epoch": 6.71, "learning_rate": 7.385818592857361e-05, "loss": 0.0008, "step": 24388 }, { "epoch": 6.81, "learning_rate": 7.330217211987902e-05, "loss": 0.0005, "step": 24752 }, { "epoch": 6.91, "learning_rate": 7.274615831118443e-05, "loss": 0.0006, "step": 25116 }, { "epoch": 7.0, "eval_loss": 8.367381951757125e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3096, "eval_samples_per_second": 21.649, "eval_steps_per_second": 0.866, "step": 25459 }, { "epoch": 7.01, "learning_rate": 7.219014450248985e-05, "loss": 0.0006, "step": 25480 }, { "epoch": 7.11, "learning_rate": 7.163413069379525e-05, "loss": 0.0007, "step": 25844 }, { "epoch": 7.21, "learning_rate": 7.107811688510066e-05, "loss": 0.0005, "step": 26208 }, { "epoch": 7.31, "learning_rate": 7.052210307640608e-05, "loss": 0.0005, "step": 26572 }, { "epoch": 7.41, "learning_rate": 6.996608926771148e-05, "loss": 0.0006, "step": 26936 }, { "epoch": 7.51, "learning_rate": 6.94100754590169e-05, "loss": 0.0004, "step": 27300 }, { "epoch": 7.61, "learning_rate": 6.885406165032231e-05, "loss": 0.0006, "step": 27664 }, { "epoch": 7.71, "learning_rate": 6.829804784162772e-05, "loss": 0.0005, "step": 28028 }, { "epoch": 7.81, "learning_rate": 6.774203403293313e-05, "loss": 0.0006, "step": 28392 }, { "epoch": 7.91, "learning_rate": 6.718602022423855e-05, "loss": 0.0007, "step": 28756 }, { "epoch": 8.0, "eval_loss": 7.714121466051438e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3368, "eval_samples_per_second": 21.397, "eval_steps_per_second": 0.856, "step": 29096 }, { "epoch": 8.01, "learning_rate": 6.663000641554395e-05, "loss": 0.0006, "step": 29120 }, { "epoch": 8.11, "learning_rate": 6.607399260684935e-05, "loss": 0.0004, "step": 29484 }, { "epoch": 8.21, "learning_rate": 6.551797879815477e-05, "loss": 0.0006, "step": 29848 }, { "epoch": 8.31, "learning_rate": 6.496196498946018e-05, "loss": 0.0005, "step": 30212 }, { "epoch": 8.41, "learning_rate": 6.440595118076558e-05, "loss": 0.0006, "step": 30576 }, { "epoch": 8.51, "learning_rate": 6.3849937372071e-05, "loss": 0.0005, "step": 30940 }, { "epoch": 8.61, "learning_rate": 6.329392356337642e-05, "loss": 0.0004, "step": 31304 }, { "epoch": 8.71, "learning_rate": 6.273790975468182e-05, "loss": 0.0005, "step": 31668 }, { "epoch": 8.81, "learning_rate": 6.218189594598723e-05, "loss": 0.0005, "step": 32032 }, { "epoch": 8.91, "learning_rate": 6.162588213729265e-05, "loss": 0.0006, "step": 32396 }, { "epoch": 9.0, "eval_loss": 7.54007260184153e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3672, "eval_samples_per_second": 21.122, "eval_steps_per_second": 0.845, "step": 32733 }, { "epoch": 9.01, "learning_rate": 6.106986832859805e-05, "loss": 0.0005, "step": 32760 }, { "epoch": 9.11, "learning_rate": 6.051385451990347e-05, "loss": 0.0006, "step": 33124 }, { "epoch": 9.21, "learning_rate": 5.9957840711208876e-05, "loss": 0.0005, "step": 33488 }, { "epoch": 9.31, "learning_rate": 5.940182690251429e-05, "loss": 0.0005, "step": 33852 }, { "epoch": 9.41, "learning_rate": 5.884581309381969e-05, "loss": 0.0005, "step": 34216 }, { "epoch": 9.51, "learning_rate": 5.82897992851251e-05, "loss": 0.0005, "step": 34580 }, { "epoch": 9.61, "learning_rate": 5.773378547643051e-05, "loss": 0.0004, "step": 34944 }, { "epoch": 9.71, "learning_rate": 5.717777166773593e-05, "loss": 0.0004, "step": 35308 }, { "epoch": 9.81, "learning_rate": 5.6621757859041336e-05, "loss": 0.0005, "step": 35672 }, { "epoch": 9.91, "learning_rate": 5.6065744050346745e-05, "loss": 0.0004, "step": 36036 }, { "epoch": 10.0, "eval_loss": 1.4673248642793624e-06, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3769, "eval_samples_per_second": 21.036, "eval_steps_per_second": 0.841, "step": 36370 }, { "epoch": 10.01, "learning_rate": 5.550973024165216e-05, "loss": 0.0005, "step": 36400 }, { "epoch": 10.11, "learning_rate": 5.495371643295757e-05, "loss": 0.0005, "step": 36764 }, { "epoch": 10.21, "learning_rate": 5.439770262426298e-05, "loss": 0.0005, "step": 37128 }, { "epoch": 10.31, "learning_rate": 5.3841688815568394e-05, "loss": 0.0004, "step": 37492 }, { "epoch": 10.41, "learning_rate": 5.32856750068738e-05, "loss": 0.0005, "step": 37856 }, { "epoch": 10.51, "learning_rate": 5.272966119817921e-05, "loss": 0.0004, "step": 38220 }, { "epoch": 10.61, "learning_rate": 5.217364738948463e-05, "loss": 0.0005, "step": 38584 }, { "epoch": 10.71, "learning_rate": 5.161763358079002e-05, "loss": 0.0005, "step": 38948 }, { "epoch": 10.81, "learning_rate": 5.106161977209544e-05, "loss": 0.0004, "step": 39312 }, { "epoch": 10.91, "learning_rate": 5.050560596340085e-05, "loss": 0.0005, "step": 39676 }, { "epoch": 11.0, "eval_loss": 8.162444373738253e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3085, "eval_samples_per_second": 21.66, "eval_steps_per_second": 0.866, "step": 40007 }, { "epoch": 11.01, "learning_rate": 4.9949592154706257e-05, "loss": 0.0005, "step": 40040 }, { "epoch": 11.11, "learning_rate": 4.939357834601167e-05, "loss": 0.0004, "step": 40404 }, { "epoch": 11.21, "learning_rate": 4.883756453731708e-05, "loss": 0.0004, "step": 40768 }, { "epoch": 11.31, "learning_rate": 4.828155072862249e-05, "loss": 0.0004, "step": 41132 }, { "epoch": 11.41, "learning_rate": 4.7725536919927906e-05, "loss": 0.0005, "step": 41496 }, { "epoch": 11.51, "learning_rate": 4.7169523111233314e-05, "loss": 0.0005, "step": 41860 }, { "epoch": 11.61, "learning_rate": 4.661350930253873e-05, "loss": 0.0005, "step": 42224 }, { "epoch": 11.71, "learning_rate": 4.605749549384413e-05, "loss": 0.0004, "step": 42588 }, { "epoch": 11.81, "learning_rate": 4.550148168514954e-05, "loss": 0.0004, "step": 42952 }, { "epoch": 11.91, "learning_rate": 4.494546787645496e-05, "loss": 0.0004, "step": 43316 }, { "epoch": 12.0, "eval_loss": 1.4554426570612122e-06, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.31, "eval_samples_per_second": 21.645, "eval_steps_per_second": 0.866, "step": 43644 }, { "epoch": 12.01, "learning_rate": 4.4389454067760366e-05, "loss": 0.0003, "step": 43680 }, { "epoch": 12.11, "learning_rate": 4.3833440259065775e-05, "loss": 0.0004, "step": 44044 }, { "epoch": 12.21, "learning_rate": 4.327742645037119e-05, "loss": 0.0003, "step": 44408 }, { "epoch": 12.31, "learning_rate": 4.27214126416766e-05, "loss": 0.0004, "step": 44772 }, { "epoch": 12.41, "learning_rate": 4.216539883298201e-05, "loss": 0.0004, "step": 45136 }, { "epoch": 12.51, "learning_rate": 4.160938502428742e-05, "loss": 0.0004, "step": 45500 }, { "epoch": 12.61, "learning_rate": 4.1053371215592826e-05, "loss": 0.0004, "step": 45864 }, { "epoch": 12.71, "learning_rate": 4.049735740689824e-05, "loss": 0.0005, "step": 46228 }, { "epoch": 12.81, "learning_rate": 3.994134359820365e-05, "loss": 0.0005, "step": 46592 }, { "epoch": 12.91, "learning_rate": 3.938532978950906e-05, "loss": 0.0004, "step": 46956 }, { "epoch": 13.0, "eval_loss": 8.021904704946792e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3634, "eval_samples_per_second": 21.156, "eval_steps_per_second": 0.846, "step": 47281 }, { "epoch": 13.01, "learning_rate": 3.882931598081447e-05, "loss": 0.0003, "step": 47320 }, { "epoch": 13.11, "learning_rate": 3.827330217211988e-05, "loss": 0.0004, "step": 47684 }, { "epoch": 13.21, "learning_rate": 3.771728836342529e-05, "loss": 0.0004, "step": 48048 }, { "epoch": 13.31, "learning_rate": 3.71612745547307e-05, "loss": 0.0004, "step": 48412 }, { "epoch": 13.41, "learning_rate": 3.660526074603611e-05, "loss": 0.0004, "step": 48776 }, { "epoch": 13.51, "learning_rate": 3.6049246937341526e-05, "loss": 0.0004, "step": 49140 }, { "epoch": 13.61, "learning_rate": 3.5493233128646935e-05, "loss": 0.0004, "step": 49504 }, { "epoch": 13.71, "learning_rate": 3.4937219319952344e-05, "loss": 0.0004, "step": 49868 }, { "epoch": 13.81, "learning_rate": 3.438120551125775e-05, "loss": 0.0004, "step": 50232 }, { "epoch": 13.91, "learning_rate": 3.382519170256316e-05, "loss": 0.0005, "step": 50596 }, { "epoch": 14.0, "eval_loss": 6.187271992530441e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.322, "eval_samples_per_second": 21.533, "eval_steps_per_second": 0.861, "step": 50918 }, { "epoch": 14.01, "learning_rate": 3.326917789386858e-05, "loss": 0.0005, "step": 50960 }, { "epoch": 14.11, "learning_rate": 3.2713164085173986e-05, "loss": 0.0005, "step": 51324 }, { "epoch": 14.21, "learning_rate": 3.2157150276479395e-05, "loss": 0.0003, "step": 51688 }, { "epoch": 14.31, "learning_rate": 3.1601136467784804e-05, "loss": 0.0004, "step": 52052 }, { "epoch": 14.41, "learning_rate": 3.104512265909021e-05, "loss": 0.0004, "step": 52416 }, { "epoch": 14.51, "learning_rate": 3.0489108850395625e-05, "loss": 0.0004, "step": 52780 }, { "epoch": 14.61, "learning_rate": 2.9933095041701037e-05, "loss": 0.0004, "step": 53144 }, { "epoch": 14.71, "learning_rate": 2.937708123300645e-05, "loss": 0.0004, "step": 53508 }, { "epoch": 14.81, "learning_rate": 2.882106742431186e-05, "loss": 0.0004, "step": 53872 }, { "epoch": 14.91, "learning_rate": 2.826505361561727e-05, "loss": 0.0004, "step": 54236 }, { "epoch": 15.0, "eval_loss": 4.629501972885919e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3761, "eval_samples_per_second": 21.043, "eval_steps_per_second": 0.842, "step": 54555 }, { "epoch": 15.01, "learning_rate": 2.7709039806922676e-05, "loss": 0.0005, "step": 54600 }, { "epoch": 15.11, "learning_rate": 2.715302599822809e-05, "loss": 0.0003, "step": 54964 }, { "epoch": 15.21, "learning_rate": 2.6597012189533497e-05, "loss": 0.0003, "step": 55328 }, { "epoch": 15.31, "learning_rate": 2.604099838083891e-05, "loss": 0.0003, "step": 55692 }, { "epoch": 15.41, "learning_rate": 2.5484984572144322e-05, "loss": 0.0003, "step": 56056 }, { "epoch": 15.51, "learning_rate": 2.492897076344973e-05, "loss": 0.0003, "step": 56420 }, { "epoch": 15.61, "learning_rate": 2.437295695475514e-05, "loss": 0.0004, "step": 56784 }, { "epoch": 15.71, "learning_rate": 2.3816943146060552e-05, "loss": 0.0003, "step": 57148 }, { "epoch": 15.81, "learning_rate": 2.326092933736596e-05, "loss": 0.0004, "step": 57512 }, { "epoch": 15.91, "learning_rate": 2.2704915528671373e-05, "loss": 0.0004, "step": 57876 }, { "epoch": 16.0, "eval_loss": 4.1432366515437025e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3527, "eval_samples_per_second": 21.253, "eval_steps_per_second": 0.85, "step": 58192 }, { "epoch": 16.01, "learning_rate": 2.2148901719976782e-05, "loss": 0.0004, "step": 58240 }, { "epoch": 16.11, "learning_rate": 2.1592887911282194e-05, "loss": 0.0004, "step": 58604 }, { "epoch": 16.21, "learning_rate": 2.1036874102587603e-05, "loss": 0.0004, "step": 58968 }, { "epoch": 16.31, "learning_rate": 2.0480860293893016e-05, "loss": 0.0004, "step": 59332 }, { "epoch": 16.41, "learning_rate": 1.9924846485198424e-05, "loss": 0.0003, "step": 59696 }, { "epoch": 16.51, "learning_rate": 1.9368832676503833e-05, "loss": 0.0004, "step": 60060 }, { "epoch": 16.61, "learning_rate": 1.8812818867809246e-05, "loss": 0.0003, "step": 60424 }, { "epoch": 16.71, "learning_rate": 1.8256805059114658e-05, "loss": 0.0004, "step": 60788 }, { "epoch": 16.81, "learning_rate": 1.7700791250420067e-05, "loss": 0.0005, "step": 61152 }, { "epoch": 16.91, "learning_rate": 1.7144777441725476e-05, "loss": 0.0004, "step": 61516 }, { "epoch": 17.0, "eval_loss": 6.78707863244199e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3415, "eval_samples_per_second": 21.354, "eval_steps_per_second": 0.854, "step": 61829 }, { "epoch": 17.01, "learning_rate": 1.6588763633030888e-05, "loss": 0.0004, "step": 61880 }, { "epoch": 17.11, "learning_rate": 1.6032749824336297e-05, "loss": 0.0003, "step": 62244 }, { "epoch": 17.21, "learning_rate": 1.547673601564171e-05, "loss": 0.0003, "step": 62608 }, { "epoch": 17.31, "learning_rate": 1.4920722206947118e-05, "loss": 0.0003, "step": 62972 }, { "epoch": 17.41, "learning_rate": 1.436470839825253e-05, "loss": 0.0003, "step": 63336 }, { "epoch": 17.51, "learning_rate": 1.3808694589557939e-05, "loss": 0.0004, "step": 63700 }, { "epoch": 17.61, "learning_rate": 1.325268078086335e-05, "loss": 0.0004, "step": 64064 }, { "epoch": 17.71, "learning_rate": 1.269666697216876e-05, "loss": 0.0004, "step": 64428 }, { "epoch": 17.81, "learning_rate": 1.2140653163474171e-05, "loss": 0.0003, "step": 64792 }, { "epoch": 17.91, "learning_rate": 1.1584639354779581e-05, "loss": 0.0004, "step": 65156 }, { "epoch": 18.0, "eval_loss": 4.647758657938539e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3851, "eval_samples_per_second": 20.964, "eval_steps_per_second": 0.839, "step": 65466 }, { "epoch": 18.01, "learning_rate": 1.1028625546084992e-05, "loss": 0.0003, "step": 65520 }, { "epoch": 18.11, "learning_rate": 1.0472611737390403e-05, "loss": 0.0003, "step": 65884 }, { "epoch": 18.22, "learning_rate": 9.916597928695811e-06, "loss": 0.0005, "step": 66248 }, { "epoch": 18.32, "learning_rate": 9.360584120001222e-06, "loss": 0.0003, "step": 66612 }, { "epoch": 18.42, "learning_rate": 8.804570311306633e-06, "loss": 0.0003, "step": 66976 }, { "epoch": 18.52, "learning_rate": 8.248556502612043e-06, "loss": 0.0004, "step": 67340 }, { "epoch": 18.62, "learning_rate": 7.692542693917454e-06, "loss": 0.0004, "step": 67704 }, { "epoch": 18.72, "learning_rate": 7.1365288852228635e-06, "loss": 0.0004, "step": 68068 }, { "epoch": 18.82, "learning_rate": 6.580515076528275e-06, "loss": 0.0004, "step": 68432 }, { "epoch": 18.92, "learning_rate": 6.024501267833685e-06, "loss": 0.0004, "step": 68796 }, { "epoch": 19.0, "eval_loss": 6.43872112959798e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3968, "eval_samples_per_second": 20.861, "eval_steps_per_second": 0.834, "step": 69103 }, { "epoch": 19.02, "learning_rate": 5.468487459139095e-06, "loss": 0.0003, "step": 69160 }, { "epoch": 19.12, "learning_rate": 4.912473650444506e-06, "loss": 0.0003, "step": 69524 }, { "epoch": 19.22, "learning_rate": 4.3564598417499164e-06, "loss": 0.0004, "step": 69888 }, { "epoch": 19.32, "learning_rate": 3.8004460330553266e-06, "loss": 0.0003, "step": 70252 }, { "epoch": 19.42, "learning_rate": 3.244432224360737e-06, "loss": 0.0003, "step": 70616 }, { "epoch": 19.52, "learning_rate": 2.6884184156661473e-06, "loss": 0.0004, "step": 70980 }, { "epoch": 19.62, "learning_rate": 2.132404606971558e-06, "loss": 0.0003, "step": 71344 }, { "epoch": 19.72, "learning_rate": 1.5763907982769683e-06, "loss": 0.0003, "step": 71708 }, { "epoch": 19.82, "learning_rate": 1.0203769895823786e-06, "loss": 0.0004, "step": 72072 }, { "epoch": 19.92, "learning_rate": 4.6436318088778905e-07, "loss": 0.0004, "step": 72436 }, { "epoch": 20.0, "eval_loss": 7.036084070932702e-07, "eval_max_distance": 0, "eval_mean_distance": 0, "eval_runtime": 2.3249, "eval_samples_per_second": 21.506, "eval_steps_per_second": 0.86, "step": 72740 }, { "epoch": 20.0, "step": 72740, "total_flos": 4.361497895702938e+16, "train_loss": 0.0006492722850091394, "train_runtime": 7118.3904, "train_samples_per_second": 306.491, "train_steps_per_second": 10.219 } ], "logging_steps": 364, "max_steps": 72740, "num_train_epochs": 20, "save_steps": 728, "total_flos": 4.361497895702938e+16, "trial_name": null, "trial_params": null }