{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 461010, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.8920122621319915e-08, "loss": 14.3504, "step": 1 }, { "epoch": 0.15, "learning_rate": 5.000289201226213e-05, "loss": 2.1001, "step": 1729 }, { "epoch": 0.3, "learning_rate": 0.00010000578402452426, "loss": 0.3756, "step": 3458 }, { "epoch": 0.45, "learning_rate": 0.0001500086760367864, "loss": 0.2527, "step": 5187 }, { "epoch": 0.6, "learning_rate": 0.00020001156804904852, "loss": 0.2076, "step": 6916 }, { "epoch": 0.75, "learning_rate": 0.00025001446006131067, "loss": 0.1864, "step": 8645 }, { "epoch": 0.9, "learning_rate": 0.0003000173520735728, "loss": 0.199, "step": 10374 }, { "epoch": 1.0, "eval_loss": 0.8173184990882874, "eval_max_distance": 167, "eval_mean_distance": 17, "eval_runtime": 64.0638, "eval_samples_per_second": 15.609, "eval_steps_per_second": 0.78, "step": 11526 }, { "epoch": 1.05, "learning_rate": 0.0003500202440858349, "loss": 0.2481, "step": 12103 }, { "epoch": 1.2, "learning_rate": 0.00040002313609809704, "loss": 0.1244, "step": 13832 }, { "epoch": 1.35, "learning_rate": 0.0004500260281103592, "loss": 0.1055, "step": 15561 }, { "epoch": 1.5, "learning_rate": 0.0005000289201226213, "loss": 0.102, "step": 17290 }, { "epoch": 1.65, "learning_rate": 0.0005500318121348835, "loss": 0.102, "step": 19019 }, { "epoch": 1.8, "learning_rate": 0.0006000347041471456, "loss": 0.1083, "step": 20748 }, { "epoch": 1.95, "learning_rate": 0.0006500375961594078, "loss": 0.1286, "step": 22477 }, { "epoch": 2.0, "eval_loss": 0.5452634692192078, "eval_max_distance": 158, "eval_mean_distance": 14, "eval_runtime": 30.4268, "eval_samples_per_second": 32.866, "eval_steps_per_second": 1.643, "step": 23052 }, { "epoch": 2.1, "learning_rate": 0.0007000404881716698, "loss": 0.1449, "step": 24206 }, { "epoch": 2.25, "learning_rate": 0.000750043380183932, "loss": 0.0747, "step": 25935 }, { "epoch": 2.4, "learning_rate": 0.0008000462721961941, "loss": 0.0744, "step": 27664 }, { "epoch": 2.55, "learning_rate": 0.0008500491642084563, "loss": 0.0742, "step": 29393 }, { "epoch": 2.7, "learning_rate": 0.0009000520562207184, "loss": 0.0792, "step": 31122 }, { "epoch": 2.85, "learning_rate": 0.0009500549482329805, "loss": 0.0891, "step": 32851 }, { "epoch": 3.0, "eval_loss": 0.3629104495048523, "eval_max_distance": 122, "eval_mean_distance": 10, "eval_runtime": 35.1149, "eval_samples_per_second": 28.478, "eval_steps_per_second": 1.424, "step": 34578 }, { "epoch": 3.0, "learning_rate": 0.0009999935733060843, "loss": 0.1079, "step": 34580 }, { "epoch": 3.15, "learning_rate": 0.000994437696415833, "loss": 0.0962, "step": 36309 }, { "epoch": 3.3, "learning_rate": 0.0009888818195255813, "loss": 0.059, "step": 38038 }, { "epoch": 3.45, "learning_rate": 0.0009833259426353302, "loss": 0.0576, "step": 39767 }, { "epoch": 3.6, "learning_rate": 0.0009777700657450789, "loss": 0.058, "step": 41496 }, { "epoch": 3.75, "learning_rate": 0.0009722141888548275, "loss": 0.0611, "step": 43225 }, { "epoch": 3.9, "learning_rate": 0.0009666583119645761, "loss": 0.0711, "step": 44954 }, { "epoch": 4.0, "eval_loss": 0.4011004865169525, "eval_max_distance": 114, "eval_mean_distance": 12, "eval_runtime": 24.9859, "eval_samples_per_second": 40.022, "eval_steps_per_second": 2.001, "step": 46104 }, { "epoch": 4.05, "learning_rate": 0.0009611024350743247, "loss": 0.0991, "step": 46683 }, { "epoch": 4.2, "learning_rate": 0.0009555465581840734, "loss": 0.0481, "step": 48412 }, { "epoch": 4.35, "learning_rate": 0.0009499906812938221, "loss": 0.044, "step": 50141 }, { "epoch": 4.5, "learning_rate": 0.0009444348044035706, "loss": 0.0435, "step": 51870 }, { "epoch": 4.65, "learning_rate": 0.0009388789275133194, "loss": 0.0454, "step": 53599 }, { "epoch": 4.8, "learning_rate": 0.000933323050623068, "loss": 0.0483, "step": 55328 }, { "epoch": 4.95, "learning_rate": 0.0009277671737328166, "loss": 0.0566, "step": 57057 }, { "epoch": 5.0, "eval_loss": 0.29974234104156494, "eval_max_distance": 100, "eval_mean_distance": 7, "eval_runtime": 25.609, "eval_samples_per_second": 39.049, "eval_steps_per_second": 1.952, "step": 57630 }, { "epoch": 5.1, "learning_rate": 0.0009222112968425653, "loss": 0.0724, "step": 58786 }, { "epoch": 5.25, "learning_rate": 0.000916655419952314, "loss": 0.0364, "step": 60515 }, { "epoch": 5.4, "learning_rate": 0.0009110995430620625, "loss": 0.035, "step": 62244 }, { "epoch": 5.55, "learning_rate": 0.0009055436661718113, "loss": 0.0353, "step": 63973 }, { "epoch": 5.7, "learning_rate": 0.0008999877892815599, "loss": 0.0364, "step": 65702 }, { "epoch": 5.85, "learning_rate": 0.0008944319123913086, "loss": 0.0402, "step": 67431 }, { "epoch": 6.0, "eval_loss": 0.15522713959217072, "eval_max_distance": 75, "eval_mean_distance": 4, "eval_runtime": 24.8618, "eval_samples_per_second": 40.222, "eval_steps_per_second": 2.011, "step": 69156 }, { "epoch": 6.0, "learning_rate": 0.0008888760355010572, "loss": 0.0506, "step": 69160 }, { "epoch": 6.15, "learning_rate": 0.0008833201586108059, "loss": 0.0514, "step": 70889 }, { "epoch": 6.3, "learning_rate": 0.0008777642817205546, "loss": 0.0298, "step": 72618 }, { "epoch": 6.45, "learning_rate": 0.0008722084048303031, "loss": 0.0287, "step": 74347 }, { "epoch": 6.6, "learning_rate": 0.0008666525279400518, "loss": 0.0289, "step": 76076 }, { "epoch": 6.75, "learning_rate": 0.0008610966510498005, "loss": 0.0303, "step": 77805 }, { "epoch": 6.9, "learning_rate": 0.0008555407741595491, "loss": 0.0348, "step": 79534 }, { "epoch": 7.0, "eval_loss": 0.15130603313446045, "eval_max_distance": 79, "eval_mean_distance": 3, "eval_runtime": 25.5455, "eval_samples_per_second": 39.146, "eval_steps_per_second": 1.957, "step": 80682 }, { "epoch": 7.05, "learning_rate": 0.0008499848972692977, "loss": 0.0527, "step": 81263 }, { "epoch": 7.2, "learning_rate": 0.0008444290203790465, "loss": 0.0293, "step": 82992 }, { "epoch": 7.35, "learning_rate": 0.000838873143488795, "loss": 0.0247, "step": 84721 }, { "epoch": 7.5, "learning_rate": 0.0008333172665985437, "loss": 0.024, "step": 86450 }, { "epoch": 7.65, "learning_rate": 0.0008277613897082924, "loss": 0.0243, "step": 88179 }, { "epoch": 7.8, "learning_rate": 0.000822205512818041, "loss": 0.0263, "step": 89908 }, { "epoch": 7.95, "learning_rate": 0.0008166496359277897, "loss": 0.0302, "step": 91637 }, { "epoch": 8.0, "eval_loss": 0.14522188901901245, "eval_max_distance": 76, "eval_mean_distance": 3, "eval_runtime": 25.6271, "eval_samples_per_second": 39.021, "eval_steps_per_second": 1.951, "step": 92208 }, { "epoch": 8.1, "learning_rate": 0.0008110937590375384, "loss": 0.0445, "step": 93366 }, { "epoch": 8.25, "learning_rate": 0.0008055378821472869, "loss": 0.0229, "step": 95095 }, { "epoch": 8.4, "learning_rate": 0.0007999820052570357, "loss": 0.0207, "step": 96824 }, { "epoch": 8.55, "learning_rate": 0.0007944261283667843, "loss": 0.0203, "step": 98553 }, { "epoch": 8.7, "learning_rate": 0.000788870251476533, "loss": 0.021, "step": 100282 }, { "epoch": 8.85, "learning_rate": 0.0007833143745862816, "loss": 0.0223, "step": 102011 }, { "epoch": 9.0, "eval_loss": 0.08658243715763092, "eval_max_distance": 76, "eval_mean_distance": 1, "eval_runtime": 23.498, "eval_samples_per_second": 42.557, "eval_steps_per_second": 2.128, "step": 103734 }, { "epoch": 9.0, "learning_rate": 0.0007777584976960303, "loss": 0.0284, "step": 103740 }, { "epoch": 9.15, "learning_rate": 0.0007722026208057789, "loss": 0.034, "step": 105469 }, { "epoch": 9.3, "learning_rate": 0.0007666467439155275, "loss": 0.0193, "step": 107198 }, { "epoch": 9.45, "learning_rate": 0.0007610908670252762, "loss": 0.0175, "step": 108927 }, { "epoch": 9.6, "learning_rate": 0.0007555349901350248, "loss": 0.0172, "step": 110656 }, { "epoch": 9.75, "learning_rate": 0.0007499791132447735, "loss": 0.018, "step": 112385 }, { "epoch": 9.9, "learning_rate": 0.0007444232363545221, "loss": 0.0202, "step": 114114 }, { "epoch": 10.0, "eval_loss": 0.10908353328704834, "eval_max_distance": 71, "eval_mean_distance": 2, "eval_runtime": 25.0909, "eval_samples_per_second": 39.855, "eval_steps_per_second": 1.993, "step": 115260 }, { "epoch": 10.05, "learning_rate": 0.0007388673594642709, "loss": 0.0342, "step": 115843 }, { "epoch": 10.2, "learning_rate": 0.0007333114825740194, "loss": 0.0202, "step": 117572 }, { "epoch": 10.35, "learning_rate": 0.0007277556056837681, "loss": 0.0159, "step": 119301 }, { "epoch": 10.5, "learning_rate": 0.0007221997287935168, "loss": 0.0149, "step": 121030 }, { "epoch": 10.65, "learning_rate": 0.0007166438519032654, "loss": 0.015, "step": 122759 }, { "epoch": 10.8, "learning_rate": 0.000711087975013014, "loss": 0.0155, "step": 124488 }, { "epoch": 10.95, "learning_rate": 0.0007055320981227628, "loss": 0.0175, "step": 126217 }, { "epoch": 11.0, "eval_loss": 0.06553788483142853, "eval_max_distance": 66, "eval_mean_distance": 1, "eval_runtime": 22.988, "eval_samples_per_second": 43.501, "eval_steps_per_second": 2.175, "step": 126786 }, { "epoch": 11.1, "learning_rate": 0.0006999762212325114, "loss": 0.0305, "step": 127946 }, { "epoch": 11.25, "learning_rate": 0.00069442034434226, "loss": 0.0157, "step": 129675 }, { "epoch": 11.4, "learning_rate": 0.0006888644674520087, "loss": 0.0134, "step": 131404 }, { "epoch": 11.55, "learning_rate": 0.0006833085905617574, "loss": 0.0124, "step": 133133 }, { "epoch": 11.7, "learning_rate": 0.0006777527136715059, "loss": 0.0131, "step": 134862 }, { "epoch": 11.85, "learning_rate": 0.0006721968367812547, "loss": 0.014, "step": 136591 }, { "epoch": 12.0, "eval_loss": 0.04735955968499184, "eval_max_distance": 44, "eval_mean_distance": 0, "eval_runtime": 22.5236, "eval_samples_per_second": 44.398, "eval_steps_per_second": 2.22, "step": 138312 }, { "epoch": 12.0, "learning_rate": 0.0006666409598910033, "loss": 0.0182, "step": 138320 }, { "epoch": 12.15, "learning_rate": 0.000661085083000752, "loss": 0.025, "step": 140049 }, { "epoch": 12.3, "learning_rate": 0.0006555292061105006, "loss": 0.0128, "step": 141778 }, { "epoch": 12.45, "learning_rate": 0.0006499733292202492, "loss": 0.0114, "step": 143507 }, { "epoch": 12.6, "learning_rate": 0.0006444174523299979, "loss": 0.011, "step": 145236 }, { "epoch": 12.75, "learning_rate": 0.0006388615754397465, "loss": 0.0117, "step": 146965 }, { "epoch": 12.9, "learning_rate": 0.0006333056985494952, "loss": 0.0122, "step": 148694 }, { "epoch": 13.0, "eval_loss": 0.05152251571416855, "eval_max_distance": 42, "eval_mean_distance": 0, "eval_runtime": 23.898, "eval_samples_per_second": 41.845, "eval_steps_per_second": 2.092, "step": 149838 }, { "epoch": 13.05, "learning_rate": 0.0006277498216592438, "loss": 0.0241, "step": 150423 }, { "epoch": 13.2, "learning_rate": 0.0006221939447689925, "loss": 0.0148, "step": 152152 }, { "epoch": 13.35, "learning_rate": 0.0006166380678787411, "loss": 0.0106, "step": 153881 }, { "epoch": 13.5, "learning_rate": 0.0006110821909884899, "loss": 0.0096, "step": 155610 }, { "epoch": 13.65, "learning_rate": 0.0006055263140982384, "loss": 0.0098, "step": 157339 }, { "epoch": 13.8, "learning_rate": 0.0005999704372079872, "loss": 0.0104, "step": 159068 }, { "epoch": 13.95, "learning_rate": 0.0005944145603177358, "loss": 0.0117, "step": 160797 }, { "epoch": 14.0, "eval_loss": 0.047906968742609024, "eval_max_distance": 30, "eval_mean_distance": 0, "eval_runtime": 23.3531, "eval_samples_per_second": 42.821, "eval_steps_per_second": 2.141, "step": 161364 }, { "epoch": 14.1, "learning_rate": 0.0005888586834274844, "loss": 0.0224, "step": 162526 }, { "epoch": 14.25, "learning_rate": 0.0005833028065372331, "loss": 0.0111, "step": 164255 }, { "epoch": 14.4, "learning_rate": 0.0005777469296469818, "loss": 0.009, "step": 165984 }, { "epoch": 14.55, "learning_rate": 0.0005721910527567303, "loss": 0.0086, "step": 167713 }, { "epoch": 14.7, "learning_rate": 0.000566635175866479, "loss": 0.0088, "step": 169442 }, { "epoch": 14.85, "learning_rate": 0.0005610792989762277, "loss": 0.0093, "step": 171171 }, { "epoch": 15.0, "eval_loss": 0.05651288107037544, "eval_max_distance": 56, "eval_mean_distance": 0, "eval_runtime": 23.6545, "eval_samples_per_second": 42.275, "eval_steps_per_second": 2.114, "step": 172890 }, { "epoch": 15.0, "learning_rate": 0.0005555234220859762, "loss": 0.0124, "step": 172900 }, { "epoch": 15.15, "learning_rate": 0.000549967545195725, "loss": 0.0181, "step": 174629 }, { "epoch": 15.3, "learning_rate": 0.0005444116683054736, "loss": 0.0091, "step": 176358 }, { "epoch": 15.45, "learning_rate": 0.0005388557914152222, "loss": 0.0075, "step": 178087 }, { "epoch": 15.6, "learning_rate": 0.0005332999145249709, "loss": 0.0075, "step": 179816 }, { "epoch": 15.75, "learning_rate": 0.0005277440376347196, "loss": 0.0075, "step": 181545 }, { "epoch": 15.9, "learning_rate": 0.0005221881607444683, "loss": 0.0085, "step": 183274 }, { "epoch": 16.0, "eval_loss": 0.047154366970062256, "eval_max_distance": 34, "eval_mean_distance": 0, "eval_runtime": 23.554, "eval_samples_per_second": 42.456, "eval_steps_per_second": 2.123, "step": 184416 }, { "epoch": 16.05, "learning_rate": 0.0005166322838542169, "loss": 0.0169, "step": 185003 }, { "epoch": 16.2, "learning_rate": 0.0005110764069639655, "loss": 0.0104, "step": 186732 }, { "epoch": 16.35, "learning_rate": 0.0005055205300737143, "loss": 0.0072, "step": 188461 }, { "epoch": 16.5, "learning_rate": 0.0004999646531834628, "loss": 0.0068, "step": 190190 }, { "epoch": 16.65, "learning_rate": 0.0004944087762932115, "loss": 0.0064, "step": 191919 }, { "epoch": 16.8, "learning_rate": 0.0004888528994029601, "loss": 0.0068, "step": 193648 }, { "epoch": 16.95, "learning_rate": 0.0004832970225127088, "loss": 0.0075, "step": 195377 }, { "epoch": 17.0, "eval_loss": 0.04200902581214905, "eval_max_distance": 28, "eval_mean_distance": 0, "eval_runtime": 22.1607, "eval_samples_per_second": 45.125, "eval_steps_per_second": 2.256, "step": 195942 }, { "epoch": 17.1, "learning_rate": 0.00047774114562245746, "loss": 0.0162, "step": 197106 }, { "epoch": 17.25, "learning_rate": 0.00047218526873220605, "loss": 0.0077, "step": 198835 }, { "epoch": 17.4, "learning_rate": 0.00046662939184195475, "loss": 0.0063, "step": 200564 }, { "epoch": 17.55, "learning_rate": 0.0004610735149517034, "loss": 0.0058, "step": 202293 }, { "epoch": 17.7, "learning_rate": 0.0004555176380614521, "loss": 0.0057, "step": 204022 }, { "epoch": 17.85, "learning_rate": 0.0004499617611712007, "loss": 0.0059, "step": 205751 }, { "epoch": 18.0, "eval_loss": 0.04149915650486946, "eval_max_distance": 32, "eval_mean_distance": 0, "eval_runtime": 22.9895, "eval_samples_per_second": 43.498, "eval_steps_per_second": 2.175, "step": 207468 }, { "epoch": 18.0, "learning_rate": 0.00044440588428094934, "loss": 0.0082, "step": 207480 }, { "epoch": 18.15, "learning_rate": 0.00043885000739069804, "loss": 0.0133, "step": 209209 }, { "epoch": 18.3, "learning_rate": 0.00043329413050044663, "loss": 0.0063, "step": 210938 }, { "epoch": 18.45, "learning_rate": 0.0004277382536101953, "loss": 0.0051, "step": 212667 }, { "epoch": 18.6, "learning_rate": 0.000422182376719944, "loss": 0.0051, "step": 214396 }, { "epoch": 18.75, "learning_rate": 0.00041662649982969263, "loss": 0.0051, "step": 216125 }, { "epoch": 18.9, "learning_rate": 0.0004110706229394413, "loss": 0.0054, "step": 217854 }, { "epoch": 19.0, "eval_loss": 0.0405677855014801, "eval_max_distance": 28, "eval_mean_distance": 0, "eval_runtime": 21.777, "eval_samples_per_second": 45.92, "eval_steps_per_second": 2.296, "step": 218994 }, { "epoch": 19.05, "learning_rate": 0.0004055147460491899, "loss": 0.0117, "step": 219583 }, { "epoch": 19.2, "learning_rate": 0.00039995886915893857, "loss": 0.0075, "step": 221312 }, { "epoch": 19.35, "learning_rate": 0.00039440299226868727, "loss": 0.0051, "step": 223041 }, { "epoch": 19.5, "learning_rate": 0.00038884711537843586, "loss": 0.0046, "step": 224770 }, { "epoch": 19.65, "learning_rate": 0.0003832912384881845, "loss": 0.0043, "step": 226499 }, { "epoch": 19.8, "learning_rate": 0.0003777353615979332, "loss": 0.0044, "step": 228228 }, { "epoch": 19.95, "learning_rate": 0.0003721794847076818, "loss": 0.0046, "step": 229957 }, { "epoch": 20.0, "eval_loss": 0.03926468640565872, "eval_max_distance": 24, "eval_mean_distance": 0, "eval_runtime": 23.1294, "eval_samples_per_second": 43.235, "eval_steps_per_second": 2.162, "step": 230520 }, { "epoch": 20.1, "learning_rate": 0.0003666236078174305, "loss": 0.0118, "step": 231686 }, { "epoch": 20.25, "learning_rate": 0.00036106773092717915, "loss": 0.0056, "step": 233415 }, { "epoch": 20.4, "learning_rate": 0.0003555118540369278, "loss": 0.0043, "step": 235144 }, { "epoch": 20.55, "learning_rate": 0.00034995597714667645, "loss": 0.0039, "step": 236873 }, { "epoch": 20.7, "learning_rate": 0.0003444001002564251, "loss": 0.0037, "step": 238602 }, { "epoch": 20.85, "learning_rate": 0.00033884422336617374, "loss": 0.004, "step": 240331 }, { "epoch": 21.0, "eval_loss": 0.04168349876999855, "eval_max_distance": 24, "eval_mean_distance": 0, "eval_runtime": 22.786, "eval_samples_per_second": 43.887, "eval_steps_per_second": 2.194, "step": 242046 }, { "epoch": 21.0, "learning_rate": 0.0003332883464759224, "loss": 0.0053, "step": 242060 }, { "epoch": 21.15, "learning_rate": 0.00032773246958567103, "loss": 0.0097, "step": 243789 }, { "epoch": 21.3, "learning_rate": 0.00032217659269541973, "loss": 0.0044, "step": 245518 }, { "epoch": 21.45, "learning_rate": 0.0003166207158051684, "loss": 0.0036, "step": 247247 }, { "epoch": 21.6, "learning_rate": 0.000311064838914917, "loss": 0.0031, "step": 248976 }, { "epoch": 21.75, "learning_rate": 0.0003055089620246657, "loss": 0.0031, "step": 250705 }, { "epoch": 21.9, "learning_rate": 0.0002999530851344143, "loss": 0.0034, "step": 252434 }, { "epoch": 22.0, "eval_loss": 0.040287140756845474, "eval_max_distance": 18, "eval_mean_distance": 0, "eval_runtime": 21.6664, "eval_samples_per_second": 46.155, "eval_steps_per_second": 2.308, "step": 253572 }, { "epoch": 22.05, "learning_rate": 0.0002943972082441629, "loss": 0.0082, "step": 254163 }, { "epoch": 22.2, "learning_rate": 0.0002888413313539116, "loss": 0.0053, "step": 255892 }, { "epoch": 22.35, "learning_rate": 0.00028328545446366026, "loss": 0.0034, "step": 257621 }, { "epoch": 22.5, "learning_rate": 0.00027772957757340896, "loss": 0.0031, "step": 259350 }, { "epoch": 22.65, "learning_rate": 0.00027217370068315756, "loss": 0.0029, "step": 261079 }, { "epoch": 22.8, "learning_rate": 0.0002666178237929062, "loss": 0.0027, "step": 262808 }, { "epoch": 22.95, "learning_rate": 0.0002610619469026549, "loss": 0.0029, "step": 264537 }, { "epoch": 23.0, "eval_loss": 0.04222797229886055, "eval_max_distance": 21, "eval_mean_distance": 0, "eval_runtime": 22.874, "eval_samples_per_second": 43.718, "eval_steps_per_second": 2.186, "step": 265098 }, { "epoch": 23.1, "learning_rate": 0.0002555060700124035, "loss": 0.0081, "step": 266266 }, { "epoch": 23.25, "learning_rate": 0.00024995019312215214, "loss": 0.0038, "step": 267995 }, { "epoch": 23.4, "learning_rate": 0.00024439431623190085, "loss": 0.0028, "step": 269724 }, { "epoch": 23.55, "learning_rate": 0.00023883843934164947, "loss": 0.0025, "step": 271453 }, { "epoch": 23.7, "learning_rate": 0.00023328256245139814, "loss": 0.0026, "step": 273182 }, { "epoch": 23.85, "learning_rate": 0.0002277266855611468, "loss": 0.0024, "step": 274911 }, { "epoch": 24.0, "eval_loss": 0.04101773351430893, "eval_max_distance": 21, "eval_mean_distance": 0, "eval_runtime": 22.8008, "eval_samples_per_second": 43.858, "eval_steps_per_second": 2.193, "step": 276624 }, { "epoch": 24.0, "learning_rate": 0.00022217080867089543, "loss": 0.0033, "step": 276640 }, { "epoch": 24.15, "learning_rate": 0.00021661493178064408, "loss": 0.0069, "step": 278369 }, { "epoch": 24.3, "learning_rate": 0.00021105905489039273, "loss": 0.0029, "step": 280098 }, { "epoch": 24.45, "learning_rate": 0.0002055031780001414, "loss": 0.0024, "step": 281827 }, { "epoch": 24.6, "learning_rate": 0.00019994730110989005, "loss": 0.0021, "step": 283556 }, { "epoch": 24.75, "learning_rate": 0.0001943914242196387, "loss": 0.002, "step": 285285 }, { "epoch": 24.9, "learning_rate": 0.00018883554732938734, "loss": 0.002, "step": 287014 }, { "epoch": 25.0, "eval_loss": 0.043479613959789276, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 21.481, "eval_samples_per_second": 46.553, "eval_steps_per_second": 2.328, "step": 288150 }, { "epoch": 25.05, "learning_rate": 0.000183279670439136, "loss": 0.0055, "step": 288743 }, { "epoch": 25.2, "learning_rate": 0.00017772379354888466, "loss": 0.0038, "step": 290472 }, { "epoch": 25.35, "learning_rate": 0.00017216791665863328, "loss": 0.0023, "step": 292201 }, { "epoch": 25.5, "learning_rate": 0.00016661203976838196, "loss": 0.002, "step": 293930 }, { "epoch": 25.65, "learning_rate": 0.0001610561628781306, "loss": 0.0017, "step": 295659 }, { "epoch": 25.8, "learning_rate": 0.00015550028598787928, "loss": 0.0018, "step": 297388 }, { "epoch": 25.95, "learning_rate": 0.0001499444090976279, "loss": 0.0016, "step": 299117 }, { "epoch": 26.0, "eval_loss": 0.04517431557178497, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 22.7192, "eval_samples_per_second": 44.016, "eval_steps_per_second": 2.201, "step": 299676 }, { "epoch": 26.1, "learning_rate": 0.00014438853220737654, "loss": 0.0054, "step": 300846 }, { "epoch": 26.25, "learning_rate": 0.00013883265531712522, "loss": 0.0026, "step": 302575 }, { "epoch": 26.4, "learning_rate": 0.00013327677842687387, "loss": 0.0018, "step": 304304 }, { "epoch": 26.55, "learning_rate": 0.0001277209015366225, "loss": 0.0016, "step": 306033 }, { "epoch": 26.7, "learning_rate": 0.00012216502464637116, "loss": 0.0015, "step": 307762 }, { "epoch": 26.85, "learning_rate": 0.00011660914775611982, "loss": 0.0013, "step": 309491 }, { "epoch": 27.0, "eval_loss": 0.04144546017050743, "eval_max_distance": 14, "eval_mean_distance": 0, "eval_runtime": 22.686, "eval_samples_per_second": 44.08, "eval_steps_per_second": 2.204, "step": 311202 }, { "epoch": 27.0, "learning_rate": 0.00011105327086586847, "loss": 0.0021, "step": 311220 }, { "epoch": 27.15, "learning_rate": 0.00010549739397561713, "loss": 0.0046, "step": 312949 }, { "epoch": 27.3, "learning_rate": 9.994151708536577e-05, "loss": 0.002, "step": 314678 }, { "epoch": 27.45, "learning_rate": 9.438564019511443e-05, "loss": 0.0016, "step": 316407 }, { "epoch": 27.6, "learning_rate": 8.882976330486308e-05, "loss": 0.0013, "step": 318136 }, { "epoch": 27.75, "learning_rate": 8.327388641461173e-05, "loss": 0.0013, "step": 319865 }, { "epoch": 27.9, "learning_rate": 7.771800952436038e-05, "loss": 0.0012, "step": 321594 }, { "epoch": 28.0, "eval_loss": 0.04387975484132767, "eval_max_distance": 14, "eval_mean_distance": 0, "eval_runtime": 21.586, "eval_samples_per_second": 46.326, "eval_steps_per_second": 2.316, "step": 322728 }, { "epoch": 28.05, "learning_rate": 7.216213263410904e-05, "loss": 0.0037, "step": 323323 }, { "epoch": 28.2, "learning_rate": 6.660625574385768e-05, "loss": 0.0027, "step": 325052 }, { "epoch": 28.35, "learning_rate": 6.105037885360634e-05, "loss": 0.0016, "step": 326781 }, { "epoch": 28.5, "learning_rate": 5.5494501963355e-05, "loss": 0.0013, "step": 328510 }, { "epoch": 28.65, "learning_rate": 4.9938625073103644e-05, "loss": 0.0011, "step": 330239 }, { "epoch": 28.8, "learning_rate": 4.43827481828523e-05, "loss": 0.001, "step": 331968 }, { "epoch": 28.95, "learning_rate": 3.882687129260095e-05, "loss": 0.001, "step": 333697 }, { "epoch": 29.0, "eval_loss": 0.044395141303539276, "eval_max_distance": 15, "eval_mean_distance": 0, "eval_runtime": 22.6289, "eval_samples_per_second": 44.191, "eval_steps_per_second": 2.21, "step": 334254 }, { "epoch": 29.1, "learning_rate": 3.32709944023496e-05, "loss": 0.0039, "step": 335426 }, { "epoch": 29.25, "learning_rate": 2.771511751209825e-05, "loss": 0.0017, "step": 337155 }, { "epoch": 29.4, "learning_rate": 2.2159240621846906e-05, "loss": 0.0012, "step": 338884 }, { "epoch": 29.55, "learning_rate": 1.6603363731595556e-05, "loss": 0.0011, "step": 340613 }, { "epoch": 29.7, "learning_rate": 1.1047486841344207e-05, "loss": 0.001, "step": 342342 }, { "epoch": 29.85, "learning_rate": 0.0004477394104151002, "loss": 0.0026, "step": 344071 }, { "epoch": 30.0, "eval_loss": 0.042660146951675415, "eval_max_distance": 19, "eval_mean_distance": 0, "eval_runtime": 48.9711, "eval_samples_per_second": 20.42, "eval_steps_per_second": 1.021, "step": 345780 }, { "epoch": 30.0, "learning_rate": 0.00044440588428094934, "loss": 0.0045, "step": 345800 }, { "epoch": 30.15, "learning_rate": 0.00044107235814679855, "loss": 0.0068, "step": 347529 }, { "epoch": 30.3, "learning_rate": 0.00043773883201264776, "loss": 0.0036, "step": 349258 }, { "epoch": 30.45, "learning_rate": 0.0004344053058784969, "loss": 0.0032, "step": 350987 }, { "epoch": 30.6, "learning_rate": 0.0004310717797443461, "loss": 0.0033, "step": 352716 }, { "epoch": 30.75, "learning_rate": 0.0004277382536101953, "loss": 0.0034, "step": 354445 }, { "epoch": 30.9, "learning_rate": 0.00042440472747604454, "loss": 0.004, "step": 356174 }, { "epoch": 23.29, "learning_rate": 0.000593547500777279, "loss": 0.0057, "step": 357903 }, { "epoch": 23.4, "learning_rate": 0.0005910471934809801, "loss": 0.0053, "step": 359632 }, { "epoch": 23.52, "learning_rate": 0.0005885468861846814, "loss": 0.0055, "step": 361361 }, { "epoch": 23.63, "learning_rate": 0.0005860465788883828, "loss": 0.0056, "step": 363090 }, { "epoch": 23.74, "learning_rate": 0.0005835462715920841, "loss": 0.006, "step": 364819 }, { "epoch": 23.85, "learning_rate": 0.0005810459642957853, "loss": 0.0064, "step": 366548 }, { "epoch": 23.97, "learning_rate": 0.0005785456569994867, "loss": 0.0077, "step": 368277 }, { "epoch": 24.0, "eval_loss": 0.049542564898729324, "eval_max_distance": 27, "eval_mean_distance": 0, "eval_runtime": 72.4399, "eval_samples_per_second": 13.805, "eval_steps_per_second": 0.925, "step": 368808 }, { "epoch": 24.08, "learning_rate": 0.0005760453497031879, "loss": 0.0158, "step": 370006 }, { "epoch": 24.19, "learning_rate": 0.0005735450424068892, "loss": 0.0082, "step": 371735 }, { "epoch": 24.3, "learning_rate": 0.0005710447351105905, "loss": 0.0057, "step": 373464 }, { "epoch": 24.42, "learning_rate": 0.0005685444278142918, "loss": 0.0059, "step": 375193 }, { "epoch": 24.53, "learning_rate": 0.0005660441205179932, "loss": 0.0055, "step": 376922 }, { "epoch": 24.64, "learning_rate": 0.0005635438132216944, "loss": 0.0062, "step": 378651 }, { "epoch": 24.75, "learning_rate": 0.0005610435059253957, "loss": 0.0059, "step": 380380 }, { "epoch": 24.87, "learning_rate": 0.000558543198629097, "loss": 0.0062, "step": 382109 }, { "epoch": 24.98, "learning_rate": 0.0005560428913327982, "loss": 0.0083, "step": 383838 }, { "epoch": 25.0, "eval_loss": 0.044624801725149155, "eval_max_distance": 37, "eval_mean_distance": 0, "eval_runtime": 35.859, "eval_samples_per_second": 27.887, "eval_steps_per_second": 1.868, "step": 384175 }, { "epoch": 25.09, "learning_rate": 0.0005535425840364996, "loss": 0.0154, "step": 385567 }, { "epoch": 25.2, "learning_rate": 0.0005510422767402009, "loss": 0.0075, "step": 387296 }, { "epoch": 25.32, "learning_rate": 0.0005485419694439023, "loss": 0.006, "step": 389025 }, { "epoch": 25.43, "learning_rate": 0.0005460416621476034, "loss": 0.0057, "step": 390754 }, { "epoch": 25.54, "learning_rate": 0.0005435413548513047, "loss": 0.0057, "step": 392483 }, { "epoch": 25.65, "learning_rate": 0.0005410410475550061, "loss": 0.0056, "step": 394212 }, { "epoch": 25.77, "learning_rate": 0.0005385407402587073, "loss": 0.0059, "step": 395941 }, { "epoch": 25.88, "learning_rate": 0.0005360404329624087, "loss": 0.0067, "step": 397670 }, { "epoch": 25.99, "learning_rate": 0.0005335401256661099, "loss": 0.0078, "step": 399399 }, { "epoch": 26.0, "eval_loss": 0.048115409910678864, "eval_max_distance": 47, "eval_mean_distance": 0, "eval_runtime": 35.8872, "eval_samples_per_second": 27.865, "eval_steps_per_second": 1.867, "step": 399542 }, { "epoch": 26.1, "learning_rate": 0.0005310398183698112, "loss": 0.0151, "step": 401128 }, { "epoch": 26.22, "learning_rate": 0.0005285395110735125, "loss": 0.0068, "step": 402857 }, { "epoch": 26.33, "learning_rate": 0.0005260392037772138, "loss": 0.0055, "step": 404586 }, { "epoch": 26.44, "learning_rate": 0.0005235388964809152, "loss": 0.0052, "step": 406315 }, { "epoch": 26.55, "learning_rate": 0.0005210385891846165, "loss": 0.0054, "step": 408044 }, { "epoch": 26.67, "learning_rate": 0.0005185382818883176, "loss": 0.0053, "step": 409773 }, { "epoch": 26.78, "learning_rate": 0.000516037974592019, "loss": 0.0052, "step": 411502 }, { "epoch": 26.89, "learning_rate": 0.0005135376672957203, "loss": 0.006, "step": 413231 }, { "epoch": 27.0, "eval_loss": 0.042408570647239685, "eval_max_distance": 37, "eval_mean_distance": 0, "eval_runtime": 14.3824, "eval_samples_per_second": 69.529, "eval_steps_per_second": 4.658, "step": 414909 }, { "epoch": 27.0, "learning_rate": 0.0005110373599994215, "loss": 0.0095, "step": 414960 }, { "epoch": 27.12, "learning_rate": 0.0005085370527031229, "loss": 0.0115, "step": 416689 }, { "epoch": 27.23, "learning_rate": 0.0005060367454068242, "loss": 0.006, "step": 418418 }, { "epoch": 27.34, "learning_rate": 0.0005035364381105254, "loss": 0.0051, "step": 420147 }, { "epoch": 27.45, "learning_rate": 0.0005010361308142267, "loss": 0.0047, "step": 421876 }, { "epoch": 27.57, "learning_rate": 0.000498535823517928, "loss": 0.0047, "step": 423605 }, { "epoch": 27.68, "learning_rate": 0.0004960355162216294, "loss": 0.0054, "step": 425334 }, { "epoch": 27.79, "learning_rate": 0.0004935352089253306, "loss": 0.0052, "step": 427063 }, { "epoch": 27.9, "learning_rate": 0.000491034901629032, "loss": 0.0056, "step": 428792 }, { "epoch": 28.0, "eval_loss": 0.04393070191144943, "eval_max_distance": 22, "eval_mean_distance": 0, "eval_runtime": 26.5483, "eval_samples_per_second": 37.667, "eval_steps_per_second": 2.524, "step": 430276 }, { "epoch": 28.02, "learning_rate": 0.0004885345943327332, "loss": 0.0105, "step": 430521 }, { "epoch": 28.13, "learning_rate": 0.0004860342870364345, "loss": 0.0093, "step": 432250 }, { "epoch": 28.24, "learning_rate": 0.0004835339797401358, "loss": 0.0055, "step": 433979 }, { "epoch": 28.35, "learning_rate": 0.0004810336724438371, "loss": 0.0049, "step": 435708 }, { "epoch": 28.47, "learning_rate": 0.00047853336514753835, "loss": 0.0044, "step": 437437 }, { "epoch": 28.58, "learning_rate": 0.0004760330578512397, "loss": 0.0044, "step": 439166 }, { "epoch": 28.69, "learning_rate": 0.000473532750554941, "loss": 0.0047, "step": 440895 }, { "epoch": 28.8, "learning_rate": 0.0004710324432586423, "loss": 0.0047, "step": 442624 }, { "epoch": 28.92, "learning_rate": 0.00046853213596234356, "loss": 0.0054, "step": 444353 }, { "epoch": 29.0, "eval_loss": 0.04814203828573227, "eval_max_distance": 23, "eval_mean_distance": 0, "eval_runtime": 35.122, "eval_samples_per_second": 28.472, "eval_steps_per_second": 1.908, "step": 445643 }, { "epoch": 29.03, "learning_rate": 0.00046603182866604485, "loss": 0.0103, "step": 446082 }, { "epoch": 29.14, "learning_rate": 0.00046353152136974614, "loss": 0.0076, "step": 447811 }, { "epoch": 29.25, "learning_rate": 0.00046103121407344743, "loss": 0.005, "step": 449540 }, { "epoch": 29.37, "learning_rate": 0.00045853090677714877, "loss": 0.0043, "step": 451269 }, { "epoch": 29.48, "learning_rate": 0.00045603059948085, "loss": 0.0043, "step": 452998 }, { "epoch": 29.59, "learning_rate": 0.00045353029218455135, "loss": 0.004, "step": 454727 }, { "epoch": 29.7, "learning_rate": 0.0004510299848882526, "loss": 0.0042, "step": 456456 }, { "epoch": 29.82, "learning_rate": 6.808721912515757e-06, "loss": 0.0039, "step": 458185 }, { "epoch": 29.93, "learning_rate": 2.641543085351246e-06, "loss": 0.004, "step": 459914 }, { "epoch": 30.0, "eval_loss": 0.04417673125863075, "eval_max_distance": 25, "eval_mean_distance": 0, "eval_runtime": 68.6071, "eval_samples_per_second": 14.576, "eval_steps_per_second": 0.977, "step": 461010 }, { "epoch": 30.0, "step": 461010, "total_flos": 1.1619438964958822e+17, "train_loss": 3.931375028864634e-05, "train_runtime": 411.3459, "train_samples_per_second": 16810.814, "train_steps_per_second": 1120.736 } ], "logging_steps": 1729, "max_steps": 461010, "num_train_epochs": 30, "save_steps": 3458, "total_flos": 1.1619438964958822e+17, "trial_name": null, "trial_params": null }