|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 461010, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.8920122621319915e-08, |
|
"loss": 14.3504, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.000289201226213e-05, |
|
"loss": 2.1001, |
|
"step": 1729 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00010000578402452426, |
|
"loss": 0.3756, |
|
"step": 3458 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001500086760367864, |
|
"loss": 0.2527, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00020001156804904852, |
|
"loss": 0.2076, |
|
"step": 6916 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00025001446006131067, |
|
"loss": 0.1864, |
|
"step": 8645 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0003000173520735728, |
|
"loss": 0.199, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.8173184990882874, |
|
"eval_max_distance": 167, |
|
"eval_mean_distance": 17, |
|
"eval_runtime": 64.0638, |
|
"eval_samples_per_second": 15.609, |
|
"eval_steps_per_second": 0.78, |
|
"step": 11526 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0003500202440858349, |
|
"loss": 0.2481, |
|
"step": 12103 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00040002313609809704, |
|
"loss": 0.1244, |
|
"step": 13832 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0004500260281103592, |
|
"loss": 0.1055, |
|
"step": 15561 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0005000289201226213, |
|
"loss": 0.102, |
|
"step": 17290 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0005500318121348835, |
|
"loss": 0.102, |
|
"step": 19019 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0006000347041471456, |
|
"loss": 0.1083, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0006500375961594078, |
|
"loss": 0.1286, |
|
"step": 22477 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5452634692192078, |
|
"eval_max_distance": 158, |
|
"eval_mean_distance": 14, |
|
"eval_runtime": 30.4268, |
|
"eval_samples_per_second": 32.866, |
|
"eval_steps_per_second": 1.643, |
|
"step": 23052 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0007000404881716698, |
|
"loss": 0.1449, |
|
"step": 24206 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.000750043380183932, |
|
"loss": 0.0747, |
|
"step": 25935 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0008000462721961941, |
|
"loss": 0.0744, |
|
"step": 27664 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0008500491642084563, |
|
"loss": 0.0742, |
|
"step": 29393 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0009000520562207184, |
|
"loss": 0.0792, |
|
"step": 31122 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0009500549482329805, |
|
"loss": 0.0891, |
|
"step": 32851 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.3629104495048523, |
|
"eval_max_distance": 122, |
|
"eval_mean_distance": 10, |
|
"eval_runtime": 35.1149, |
|
"eval_samples_per_second": 28.478, |
|
"eval_steps_per_second": 1.424, |
|
"step": 34578 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0009999935733060843, |
|
"loss": 0.1079, |
|
"step": 34580 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.000994437696415833, |
|
"loss": 0.0962, |
|
"step": 36309 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0009888818195255813, |
|
"loss": 0.059, |
|
"step": 38038 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.0009833259426353302, |
|
"loss": 0.0576, |
|
"step": 39767 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0009777700657450789, |
|
"loss": 0.058, |
|
"step": 41496 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0009722141888548275, |
|
"loss": 0.0611, |
|
"step": 43225 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0009666583119645761, |
|
"loss": 0.0711, |
|
"step": 44954 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.4011004865169525, |
|
"eval_max_distance": 114, |
|
"eval_mean_distance": 12, |
|
"eval_runtime": 24.9859, |
|
"eval_samples_per_second": 40.022, |
|
"eval_steps_per_second": 2.001, |
|
"step": 46104 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0009611024350743247, |
|
"loss": 0.0991, |
|
"step": 46683 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0009555465581840734, |
|
"loss": 0.0481, |
|
"step": 48412 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0009499906812938221, |
|
"loss": 0.044, |
|
"step": 50141 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0009444348044035706, |
|
"loss": 0.0435, |
|
"step": 51870 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.0009388789275133194, |
|
"loss": 0.0454, |
|
"step": 53599 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.000933323050623068, |
|
"loss": 0.0483, |
|
"step": 55328 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0009277671737328166, |
|
"loss": 0.0566, |
|
"step": 57057 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.29974234104156494, |
|
"eval_max_distance": 100, |
|
"eval_mean_distance": 7, |
|
"eval_runtime": 25.609, |
|
"eval_samples_per_second": 39.049, |
|
"eval_steps_per_second": 1.952, |
|
"step": 57630 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0009222112968425653, |
|
"loss": 0.0724, |
|
"step": 58786 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.000916655419952314, |
|
"loss": 0.0364, |
|
"step": 60515 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.0009110995430620625, |
|
"loss": 0.035, |
|
"step": 62244 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.0009055436661718113, |
|
"loss": 0.0353, |
|
"step": 63973 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0008999877892815599, |
|
"loss": 0.0364, |
|
"step": 65702 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0008944319123913086, |
|
"loss": 0.0402, |
|
"step": 67431 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.15522713959217072, |
|
"eval_max_distance": 75, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 24.8618, |
|
"eval_samples_per_second": 40.222, |
|
"eval_steps_per_second": 2.011, |
|
"step": 69156 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0008888760355010572, |
|
"loss": 0.0506, |
|
"step": 69160 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0008833201586108059, |
|
"loss": 0.0514, |
|
"step": 70889 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0008777642817205546, |
|
"loss": 0.0298, |
|
"step": 72618 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0008722084048303031, |
|
"loss": 0.0287, |
|
"step": 74347 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0008666525279400518, |
|
"loss": 0.0289, |
|
"step": 76076 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.0008610966510498005, |
|
"loss": 0.0303, |
|
"step": 77805 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.0008555407741595491, |
|
"loss": 0.0348, |
|
"step": 79534 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.15130603313446045, |
|
"eval_max_distance": 79, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 25.5455, |
|
"eval_samples_per_second": 39.146, |
|
"eval_steps_per_second": 1.957, |
|
"step": 80682 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0008499848972692977, |
|
"loss": 0.0527, |
|
"step": 81263 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0008444290203790465, |
|
"loss": 0.0293, |
|
"step": 82992 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.000838873143488795, |
|
"loss": 0.0247, |
|
"step": 84721 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0008333172665985437, |
|
"loss": 0.024, |
|
"step": 86450 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0008277613897082924, |
|
"loss": 0.0243, |
|
"step": 88179 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.000822205512818041, |
|
"loss": 0.0263, |
|
"step": 89908 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.0008166496359277897, |
|
"loss": 0.0302, |
|
"step": 91637 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.14522188901901245, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 25.6271, |
|
"eval_samples_per_second": 39.021, |
|
"eval_steps_per_second": 1.951, |
|
"step": 92208 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.0008110937590375384, |
|
"loss": 0.0445, |
|
"step": 93366 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.0008055378821472869, |
|
"loss": 0.0229, |
|
"step": 95095 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0007999820052570357, |
|
"loss": 0.0207, |
|
"step": 96824 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.0007944261283667843, |
|
"loss": 0.0203, |
|
"step": 98553 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.000788870251476533, |
|
"loss": 0.021, |
|
"step": 100282 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.0007833143745862816, |
|
"loss": 0.0223, |
|
"step": 102011 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.08658243715763092, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 23.498, |
|
"eval_samples_per_second": 42.557, |
|
"eval_steps_per_second": 2.128, |
|
"step": 103734 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0007777584976960303, |
|
"loss": 0.0284, |
|
"step": 103740 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.0007722026208057789, |
|
"loss": 0.034, |
|
"step": 105469 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.0007666467439155275, |
|
"loss": 0.0193, |
|
"step": 107198 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.0007610908670252762, |
|
"loss": 0.0175, |
|
"step": 108927 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0007555349901350248, |
|
"loss": 0.0172, |
|
"step": 110656 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.0007499791132447735, |
|
"loss": 0.018, |
|
"step": 112385 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.0007444232363545221, |
|
"loss": 0.0202, |
|
"step": 114114 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.10908353328704834, |
|
"eval_max_distance": 71, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 25.0909, |
|
"eval_samples_per_second": 39.855, |
|
"eval_steps_per_second": 1.993, |
|
"step": 115260 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 0.0007388673594642709, |
|
"loss": 0.0342, |
|
"step": 115843 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 0.0007333114825740194, |
|
"loss": 0.0202, |
|
"step": 117572 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 0.0007277556056837681, |
|
"loss": 0.0159, |
|
"step": 119301 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.0007221997287935168, |
|
"loss": 0.0149, |
|
"step": 121030 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.0007166438519032654, |
|
"loss": 0.015, |
|
"step": 122759 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 0.000711087975013014, |
|
"loss": 0.0155, |
|
"step": 124488 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 0.0007055320981227628, |
|
"loss": 0.0175, |
|
"step": 126217 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.06553788483142853, |
|
"eval_max_distance": 66, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 22.988, |
|
"eval_samples_per_second": 43.501, |
|
"eval_steps_per_second": 2.175, |
|
"step": 126786 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 0.0006999762212325114, |
|
"loss": 0.0305, |
|
"step": 127946 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 0.00069442034434226, |
|
"loss": 0.0157, |
|
"step": 129675 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.0006888644674520087, |
|
"loss": 0.0134, |
|
"step": 131404 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 0.0006833085905617574, |
|
"loss": 0.0124, |
|
"step": 133133 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 0.0006777527136715059, |
|
"loss": 0.0131, |
|
"step": 134862 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 0.0006721968367812547, |
|
"loss": 0.014, |
|
"step": 136591 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.04735955968499184, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.5236, |
|
"eval_samples_per_second": 44.398, |
|
"eval_steps_per_second": 2.22, |
|
"step": 138312 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.0006666409598910033, |
|
"loss": 0.0182, |
|
"step": 138320 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 0.000661085083000752, |
|
"loss": 0.025, |
|
"step": 140049 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 0.0006555292061105006, |
|
"loss": 0.0128, |
|
"step": 141778 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 0.0006499733292202492, |
|
"loss": 0.0114, |
|
"step": 143507 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 0.0006444174523299979, |
|
"loss": 0.011, |
|
"step": 145236 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 0.0006388615754397465, |
|
"loss": 0.0117, |
|
"step": 146965 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.0006333056985494952, |
|
"loss": 0.0122, |
|
"step": 148694 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.05152251571416855, |
|
"eval_max_distance": 42, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 23.898, |
|
"eval_samples_per_second": 41.845, |
|
"eval_steps_per_second": 2.092, |
|
"step": 149838 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 0.0006277498216592438, |
|
"loss": 0.0241, |
|
"step": 150423 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 0.0006221939447689925, |
|
"loss": 0.0148, |
|
"step": 152152 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 0.0006166380678787411, |
|
"loss": 0.0106, |
|
"step": 153881 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 0.0006110821909884899, |
|
"loss": 0.0096, |
|
"step": 155610 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 0.0006055263140982384, |
|
"loss": 0.0098, |
|
"step": 157339 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 0.0005999704372079872, |
|
"loss": 0.0104, |
|
"step": 159068 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 0.0005944145603177358, |
|
"loss": 0.0117, |
|
"step": 160797 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.047906968742609024, |
|
"eval_max_distance": 30, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 23.3531, |
|
"eval_samples_per_second": 42.821, |
|
"eval_steps_per_second": 2.141, |
|
"step": 161364 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.0005888586834274844, |
|
"loss": 0.0224, |
|
"step": 162526 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 0.0005833028065372331, |
|
"loss": 0.0111, |
|
"step": 164255 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 0.0005777469296469818, |
|
"loss": 0.009, |
|
"step": 165984 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 0.0005721910527567303, |
|
"loss": 0.0086, |
|
"step": 167713 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 0.000566635175866479, |
|
"loss": 0.0088, |
|
"step": 169442 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 0.0005610792989762277, |
|
"loss": 0.0093, |
|
"step": 171171 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.05651288107037544, |
|
"eval_max_distance": 56, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 23.6545, |
|
"eval_samples_per_second": 42.275, |
|
"eval_steps_per_second": 2.114, |
|
"step": 172890 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0005555234220859762, |
|
"loss": 0.0124, |
|
"step": 172900 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 0.000549967545195725, |
|
"loss": 0.0181, |
|
"step": 174629 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 0.0005444116683054736, |
|
"loss": 0.0091, |
|
"step": 176358 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 0.0005388557914152222, |
|
"loss": 0.0075, |
|
"step": 178087 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 0.0005332999145249709, |
|
"loss": 0.0075, |
|
"step": 179816 |
|
}, |
|
{ |
|
"epoch": 15.75, |
|
"learning_rate": 0.0005277440376347196, |
|
"loss": 0.0075, |
|
"step": 181545 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 0.0005221881607444683, |
|
"loss": 0.0085, |
|
"step": 183274 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.047154366970062256, |
|
"eval_max_distance": 34, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 23.554, |
|
"eval_samples_per_second": 42.456, |
|
"eval_steps_per_second": 2.123, |
|
"step": 184416 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 0.0005166322838542169, |
|
"loss": 0.0169, |
|
"step": 185003 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 0.0005110764069639655, |
|
"loss": 0.0104, |
|
"step": 186732 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 0.0005055205300737143, |
|
"loss": 0.0072, |
|
"step": 188461 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 0.0004999646531834628, |
|
"loss": 0.0068, |
|
"step": 190190 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 0.0004944087762932115, |
|
"loss": 0.0064, |
|
"step": 191919 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 0.0004888528994029601, |
|
"loss": 0.0068, |
|
"step": 193648 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.0004832970225127088, |
|
"loss": 0.0075, |
|
"step": 195377 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.04200902581214905, |
|
"eval_max_distance": 28, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.1607, |
|
"eval_samples_per_second": 45.125, |
|
"eval_steps_per_second": 2.256, |
|
"step": 195942 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 0.00047774114562245746, |
|
"loss": 0.0162, |
|
"step": 197106 |
|
}, |
|
{ |
|
"epoch": 17.25, |
|
"learning_rate": 0.00047218526873220605, |
|
"loss": 0.0077, |
|
"step": 198835 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 0.00046662939184195475, |
|
"loss": 0.0063, |
|
"step": 200564 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 0.0004610735149517034, |
|
"loss": 0.0058, |
|
"step": 202293 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 0.0004555176380614521, |
|
"loss": 0.0057, |
|
"step": 204022 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 0.0004499617611712007, |
|
"loss": 0.0059, |
|
"step": 205751 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.04149915650486946, |
|
"eval_max_distance": 32, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.9895, |
|
"eval_samples_per_second": 43.498, |
|
"eval_steps_per_second": 2.175, |
|
"step": 207468 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.00044440588428094934, |
|
"loss": 0.0082, |
|
"step": 207480 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 0.00043885000739069804, |
|
"loss": 0.0133, |
|
"step": 209209 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 0.00043329413050044663, |
|
"loss": 0.0063, |
|
"step": 210938 |
|
}, |
|
{ |
|
"epoch": 18.45, |
|
"learning_rate": 0.0004277382536101953, |
|
"loss": 0.0051, |
|
"step": 212667 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 0.000422182376719944, |
|
"loss": 0.0051, |
|
"step": 214396 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 0.00041662649982969263, |
|
"loss": 0.0051, |
|
"step": 216125 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 0.0004110706229394413, |
|
"loss": 0.0054, |
|
"step": 217854 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.0405677855014801, |
|
"eval_max_distance": 28, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 21.777, |
|
"eval_samples_per_second": 45.92, |
|
"eval_steps_per_second": 2.296, |
|
"step": 218994 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 0.0004055147460491899, |
|
"loss": 0.0117, |
|
"step": 219583 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.00039995886915893857, |
|
"loss": 0.0075, |
|
"step": 221312 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 0.00039440299226868727, |
|
"loss": 0.0051, |
|
"step": 223041 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"learning_rate": 0.00038884711537843586, |
|
"loss": 0.0046, |
|
"step": 224770 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"learning_rate": 0.0003832912384881845, |
|
"loss": 0.0043, |
|
"step": 226499 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 0.0003777353615979332, |
|
"loss": 0.0044, |
|
"step": 228228 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"learning_rate": 0.0003721794847076818, |
|
"loss": 0.0046, |
|
"step": 229957 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.03926468640565872, |
|
"eval_max_distance": 24, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 23.1294, |
|
"eval_samples_per_second": 43.235, |
|
"eval_steps_per_second": 2.162, |
|
"step": 230520 |
|
}, |
|
{ |
|
"epoch": 20.1, |
|
"learning_rate": 0.0003666236078174305, |
|
"loss": 0.0118, |
|
"step": 231686 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"learning_rate": 0.00036106773092717915, |
|
"loss": 0.0056, |
|
"step": 233415 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 0.0003555118540369278, |
|
"loss": 0.0043, |
|
"step": 235144 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"learning_rate": 0.00034995597714667645, |
|
"loss": 0.0039, |
|
"step": 236873 |
|
}, |
|
{ |
|
"epoch": 20.7, |
|
"learning_rate": 0.0003444001002564251, |
|
"loss": 0.0037, |
|
"step": 238602 |
|
}, |
|
{ |
|
"epoch": 20.85, |
|
"learning_rate": 0.00033884422336617374, |
|
"loss": 0.004, |
|
"step": 240331 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.04168349876999855, |
|
"eval_max_distance": 24, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.786, |
|
"eval_samples_per_second": 43.887, |
|
"eval_steps_per_second": 2.194, |
|
"step": 242046 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 0.0003332883464759224, |
|
"loss": 0.0053, |
|
"step": 242060 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 0.00032773246958567103, |
|
"loss": 0.0097, |
|
"step": 243789 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 0.00032217659269541973, |
|
"loss": 0.0044, |
|
"step": 245518 |
|
}, |
|
{ |
|
"epoch": 21.45, |
|
"learning_rate": 0.0003166207158051684, |
|
"loss": 0.0036, |
|
"step": 247247 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 0.000311064838914917, |
|
"loss": 0.0031, |
|
"step": 248976 |
|
}, |
|
{ |
|
"epoch": 21.75, |
|
"learning_rate": 0.0003055089620246657, |
|
"loss": 0.0031, |
|
"step": 250705 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"learning_rate": 0.0002999530851344143, |
|
"loss": 0.0034, |
|
"step": 252434 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.040287140756845474, |
|
"eval_max_distance": 18, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 21.6664, |
|
"eval_samples_per_second": 46.155, |
|
"eval_steps_per_second": 2.308, |
|
"step": 253572 |
|
}, |
|
{ |
|
"epoch": 22.05, |
|
"learning_rate": 0.0002943972082441629, |
|
"loss": 0.0082, |
|
"step": 254163 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"learning_rate": 0.0002888413313539116, |
|
"loss": 0.0053, |
|
"step": 255892 |
|
}, |
|
{ |
|
"epoch": 22.35, |
|
"learning_rate": 0.00028328545446366026, |
|
"loss": 0.0034, |
|
"step": 257621 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 0.00027772957757340896, |
|
"loss": 0.0031, |
|
"step": 259350 |
|
}, |
|
{ |
|
"epoch": 22.65, |
|
"learning_rate": 0.00027217370068315756, |
|
"loss": 0.0029, |
|
"step": 261079 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 0.0002666178237929062, |
|
"loss": 0.0027, |
|
"step": 262808 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 0.0002610619469026549, |
|
"loss": 0.0029, |
|
"step": 264537 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.04222797229886055, |
|
"eval_max_distance": 21, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.874, |
|
"eval_samples_per_second": 43.718, |
|
"eval_steps_per_second": 2.186, |
|
"step": 265098 |
|
}, |
|
{ |
|
"epoch": 23.1, |
|
"learning_rate": 0.0002555060700124035, |
|
"loss": 0.0081, |
|
"step": 266266 |
|
}, |
|
{ |
|
"epoch": 23.25, |
|
"learning_rate": 0.00024995019312215214, |
|
"loss": 0.0038, |
|
"step": 267995 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 0.00024439431623190085, |
|
"loss": 0.0028, |
|
"step": 269724 |
|
}, |
|
{ |
|
"epoch": 23.55, |
|
"learning_rate": 0.00023883843934164947, |
|
"loss": 0.0025, |
|
"step": 271453 |
|
}, |
|
{ |
|
"epoch": 23.7, |
|
"learning_rate": 0.00023328256245139814, |
|
"loss": 0.0026, |
|
"step": 273182 |
|
}, |
|
{ |
|
"epoch": 23.85, |
|
"learning_rate": 0.0002277266855611468, |
|
"loss": 0.0024, |
|
"step": 274911 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.04101773351430893, |
|
"eval_max_distance": 21, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.8008, |
|
"eval_samples_per_second": 43.858, |
|
"eval_steps_per_second": 2.193, |
|
"step": 276624 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.00022217080867089543, |
|
"loss": 0.0033, |
|
"step": 276640 |
|
}, |
|
{ |
|
"epoch": 24.15, |
|
"learning_rate": 0.00021661493178064408, |
|
"loss": 0.0069, |
|
"step": 278369 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 0.00021105905489039273, |
|
"loss": 0.0029, |
|
"step": 280098 |
|
}, |
|
{ |
|
"epoch": 24.45, |
|
"learning_rate": 0.0002055031780001414, |
|
"loss": 0.0024, |
|
"step": 281827 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 0.00019994730110989005, |
|
"loss": 0.0021, |
|
"step": 283556 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"learning_rate": 0.0001943914242196387, |
|
"loss": 0.002, |
|
"step": 285285 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"learning_rate": 0.00018883554732938734, |
|
"loss": 0.002, |
|
"step": 287014 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.043479613959789276, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 21.481, |
|
"eval_samples_per_second": 46.553, |
|
"eval_steps_per_second": 2.328, |
|
"step": 288150 |
|
}, |
|
{ |
|
"epoch": 25.05, |
|
"learning_rate": 0.000183279670439136, |
|
"loss": 0.0055, |
|
"step": 288743 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 0.00017772379354888466, |
|
"loss": 0.0038, |
|
"step": 290472 |
|
}, |
|
{ |
|
"epoch": 25.35, |
|
"learning_rate": 0.00017216791665863328, |
|
"loss": 0.0023, |
|
"step": 292201 |
|
}, |
|
{ |
|
"epoch": 25.5, |
|
"learning_rate": 0.00016661203976838196, |
|
"loss": 0.002, |
|
"step": 293930 |
|
}, |
|
{ |
|
"epoch": 25.65, |
|
"learning_rate": 0.0001610561628781306, |
|
"loss": 0.0017, |
|
"step": 295659 |
|
}, |
|
{ |
|
"epoch": 25.8, |
|
"learning_rate": 0.00015550028598787928, |
|
"loss": 0.0018, |
|
"step": 297388 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"learning_rate": 0.0001499444090976279, |
|
"loss": 0.0016, |
|
"step": 299117 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.04517431557178497, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.7192, |
|
"eval_samples_per_second": 44.016, |
|
"eval_steps_per_second": 2.201, |
|
"step": 299676 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"learning_rate": 0.00014438853220737654, |
|
"loss": 0.0054, |
|
"step": 300846 |
|
}, |
|
{ |
|
"epoch": 26.25, |
|
"learning_rate": 0.00013883265531712522, |
|
"loss": 0.0026, |
|
"step": 302575 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 0.00013327677842687387, |
|
"loss": 0.0018, |
|
"step": 304304 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 0.0001277209015366225, |
|
"loss": 0.0016, |
|
"step": 306033 |
|
}, |
|
{ |
|
"epoch": 26.7, |
|
"learning_rate": 0.00012216502464637116, |
|
"loss": 0.0015, |
|
"step": 307762 |
|
}, |
|
{ |
|
"epoch": 26.85, |
|
"learning_rate": 0.00011660914775611982, |
|
"loss": 0.0013, |
|
"step": 309491 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.04144546017050743, |
|
"eval_max_distance": 14, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.686, |
|
"eval_samples_per_second": 44.08, |
|
"eval_steps_per_second": 2.204, |
|
"step": 311202 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 0.00011105327086586847, |
|
"loss": 0.0021, |
|
"step": 311220 |
|
}, |
|
{ |
|
"epoch": 27.15, |
|
"learning_rate": 0.00010549739397561713, |
|
"loss": 0.0046, |
|
"step": 312949 |
|
}, |
|
{ |
|
"epoch": 27.3, |
|
"learning_rate": 9.994151708536577e-05, |
|
"loss": 0.002, |
|
"step": 314678 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 9.438564019511443e-05, |
|
"loss": 0.0016, |
|
"step": 316407 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"learning_rate": 8.882976330486308e-05, |
|
"loss": 0.0013, |
|
"step": 318136 |
|
}, |
|
{ |
|
"epoch": 27.75, |
|
"learning_rate": 8.327388641461173e-05, |
|
"loss": 0.0013, |
|
"step": 319865 |
|
}, |
|
{ |
|
"epoch": 27.9, |
|
"learning_rate": 7.771800952436038e-05, |
|
"loss": 0.0012, |
|
"step": 321594 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.04387975484132767, |
|
"eval_max_distance": 14, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 21.586, |
|
"eval_samples_per_second": 46.326, |
|
"eval_steps_per_second": 2.316, |
|
"step": 322728 |
|
}, |
|
{ |
|
"epoch": 28.05, |
|
"learning_rate": 7.216213263410904e-05, |
|
"loss": 0.0037, |
|
"step": 323323 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"learning_rate": 6.660625574385768e-05, |
|
"loss": 0.0027, |
|
"step": 325052 |
|
}, |
|
{ |
|
"epoch": 28.35, |
|
"learning_rate": 6.105037885360634e-05, |
|
"loss": 0.0016, |
|
"step": 326781 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 5.5494501963355e-05, |
|
"loss": 0.0013, |
|
"step": 328510 |
|
}, |
|
{ |
|
"epoch": 28.65, |
|
"learning_rate": 4.9938625073103644e-05, |
|
"loss": 0.0011, |
|
"step": 330239 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 4.43827481828523e-05, |
|
"loss": 0.001, |
|
"step": 331968 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"learning_rate": 3.882687129260095e-05, |
|
"loss": 0.001, |
|
"step": 333697 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.044395141303539276, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 22.6289, |
|
"eval_samples_per_second": 44.191, |
|
"eval_steps_per_second": 2.21, |
|
"step": 334254 |
|
}, |
|
{ |
|
"epoch": 29.1, |
|
"learning_rate": 3.32709944023496e-05, |
|
"loss": 0.0039, |
|
"step": 335426 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 2.771511751209825e-05, |
|
"loss": 0.0017, |
|
"step": 337155 |
|
}, |
|
{ |
|
"epoch": 29.4, |
|
"learning_rate": 2.2159240621846906e-05, |
|
"loss": 0.0012, |
|
"step": 338884 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"learning_rate": 1.6603363731595556e-05, |
|
"loss": 0.0011, |
|
"step": 340613 |
|
}, |
|
{ |
|
"epoch": 29.7, |
|
"learning_rate": 1.1047486841344207e-05, |
|
"loss": 0.001, |
|
"step": 342342 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 0.0004477394104151002, |
|
"loss": 0.0026, |
|
"step": 344071 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.042660146951675415, |
|
"eval_max_distance": 19, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 48.9711, |
|
"eval_samples_per_second": 20.42, |
|
"eval_steps_per_second": 1.021, |
|
"step": 345780 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.00044440588428094934, |
|
"loss": 0.0045, |
|
"step": 345800 |
|
}, |
|
{ |
|
"epoch": 30.15, |
|
"learning_rate": 0.00044107235814679855, |
|
"loss": 0.0068, |
|
"step": 347529 |
|
}, |
|
{ |
|
"epoch": 30.3, |
|
"learning_rate": 0.00043773883201264776, |
|
"loss": 0.0036, |
|
"step": 349258 |
|
}, |
|
{ |
|
"epoch": 30.45, |
|
"learning_rate": 0.0004344053058784969, |
|
"loss": 0.0032, |
|
"step": 350987 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"learning_rate": 0.0004310717797443461, |
|
"loss": 0.0033, |
|
"step": 352716 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"learning_rate": 0.0004277382536101953, |
|
"loss": 0.0034, |
|
"step": 354445 |
|
}, |
|
{ |
|
"epoch": 30.9, |
|
"learning_rate": 0.00042440472747604454, |
|
"loss": 0.004, |
|
"step": 356174 |
|
}, |
|
{ |
|
"epoch": 23.29, |
|
"learning_rate": 0.000593547500777279, |
|
"loss": 0.0057, |
|
"step": 357903 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 0.0005910471934809801, |
|
"loss": 0.0053, |
|
"step": 359632 |
|
}, |
|
{ |
|
"epoch": 23.52, |
|
"learning_rate": 0.0005885468861846814, |
|
"loss": 0.0055, |
|
"step": 361361 |
|
}, |
|
{ |
|
"epoch": 23.63, |
|
"learning_rate": 0.0005860465788883828, |
|
"loss": 0.0056, |
|
"step": 363090 |
|
}, |
|
{ |
|
"epoch": 23.74, |
|
"learning_rate": 0.0005835462715920841, |
|
"loss": 0.006, |
|
"step": 364819 |
|
}, |
|
{ |
|
"epoch": 23.85, |
|
"learning_rate": 0.0005810459642957853, |
|
"loss": 0.0064, |
|
"step": 366548 |
|
}, |
|
{ |
|
"epoch": 23.97, |
|
"learning_rate": 0.0005785456569994867, |
|
"loss": 0.0077, |
|
"step": 368277 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.049542564898729324, |
|
"eval_max_distance": 27, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 72.4399, |
|
"eval_samples_per_second": 13.805, |
|
"eval_steps_per_second": 0.925, |
|
"step": 368808 |
|
}, |
|
{ |
|
"epoch": 24.08, |
|
"learning_rate": 0.0005760453497031879, |
|
"loss": 0.0158, |
|
"step": 370006 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 0.0005735450424068892, |
|
"loss": 0.0082, |
|
"step": 371735 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 0.0005710447351105905, |
|
"loss": 0.0057, |
|
"step": 373464 |
|
}, |
|
{ |
|
"epoch": 24.42, |
|
"learning_rate": 0.0005685444278142918, |
|
"loss": 0.0059, |
|
"step": 375193 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 0.0005660441205179932, |
|
"loss": 0.0055, |
|
"step": 376922 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 0.0005635438132216944, |
|
"loss": 0.0062, |
|
"step": 378651 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"learning_rate": 0.0005610435059253957, |
|
"loss": 0.0059, |
|
"step": 380380 |
|
}, |
|
{ |
|
"epoch": 24.87, |
|
"learning_rate": 0.000558543198629097, |
|
"loss": 0.0062, |
|
"step": 382109 |
|
}, |
|
{ |
|
"epoch": 24.98, |
|
"learning_rate": 0.0005560428913327982, |
|
"loss": 0.0083, |
|
"step": 383838 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.044624801725149155, |
|
"eval_max_distance": 37, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 35.859, |
|
"eval_samples_per_second": 27.887, |
|
"eval_steps_per_second": 1.868, |
|
"step": 384175 |
|
}, |
|
{ |
|
"epoch": 25.09, |
|
"learning_rate": 0.0005535425840364996, |
|
"loss": 0.0154, |
|
"step": 385567 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 0.0005510422767402009, |
|
"loss": 0.0075, |
|
"step": 387296 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 0.0005485419694439023, |
|
"loss": 0.006, |
|
"step": 389025 |
|
}, |
|
{ |
|
"epoch": 25.43, |
|
"learning_rate": 0.0005460416621476034, |
|
"loss": 0.0057, |
|
"step": 390754 |
|
}, |
|
{ |
|
"epoch": 25.54, |
|
"learning_rate": 0.0005435413548513047, |
|
"loss": 0.0057, |
|
"step": 392483 |
|
}, |
|
{ |
|
"epoch": 25.65, |
|
"learning_rate": 0.0005410410475550061, |
|
"loss": 0.0056, |
|
"step": 394212 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"learning_rate": 0.0005385407402587073, |
|
"loss": 0.0059, |
|
"step": 395941 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"learning_rate": 0.0005360404329624087, |
|
"loss": 0.0067, |
|
"step": 397670 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"learning_rate": 0.0005335401256661099, |
|
"loss": 0.0078, |
|
"step": 399399 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.048115409910678864, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 35.8872, |
|
"eval_samples_per_second": 27.865, |
|
"eval_steps_per_second": 1.867, |
|
"step": 399542 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"learning_rate": 0.0005310398183698112, |
|
"loss": 0.0151, |
|
"step": 401128 |
|
}, |
|
{ |
|
"epoch": 26.22, |
|
"learning_rate": 0.0005285395110735125, |
|
"loss": 0.0068, |
|
"step": 402857 |
|
}, |
|
{ |
|
"epoch": 26.33, |
|
"learning_rate": 0.0005260392037772138, |
|
"loss": 0.0055, |
|
"step": 404586 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"learning_rate": 0.0005235388964809152, |
|
"loss": 0.0052, |
|
"step": 406315 |
|
}, |
|
{ |
|
"epoch": 26.55, |
|
"learning_rate": 0.0005210385891846165, |
|
"loss": 0.0054, |
|
"step": 408044 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 0.0005185382818883176, |
|
"loss": 0.0053, |
|
"step": 409773 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"learning_rate": 0.000516037974592019, |
|
"loss": 0.0052, |
|
"step": 411502 |
|
}, |
|
{ |
|
"epoch": 26.89, |
|
"learning_rate": 0.0005135376672957203, |
|
"loss": 0.006, |
|
"step": 413231 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.042408570647239685, |
|
"eval_max_distance": 37, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 14.3824, |
|
"eval_samples_per_second": 69.529, |
|
"eval_steps_per_second": 4.658, |
|
"step": 414909 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 0.0005110373599994215, |
|
"loss": 0.0095, |
|
"step": 414960 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 0.0005085370527031229, |
|
"loss": 0.0115, |
|
"step": 416689 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"learning_rate": 0.0005060367454068242, |
|
"loss": 0.006, |
|
"step": 418418 |
|
}, |
|
{ |
|
"epoch": 27.34, |
|
"learning_rate": 0.0005035364381105254, |
|
"loss": 0.0051, |
|
"step": 420147 |
|
}, |
|
{ |
|
"epoch": 27.45, |
|
"learning_rate": 0.0005010361308142267, |
|
"loss": 0.0047, |
|
"step": 421876 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"learning_rate": 0.000498535823517928, |
|
"loss": 0.0047, |
|
"step": 423605 |
|
}, |
|
{ |
|
"epoch": 27.68, |
|
"learning_rate": 0.0004960355162216294, |
|
"loss": 0.0054, |
|
"step": 425334 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"learning_rate": 0.0004935352089253306, |
|
"loss": 0.0052, |
|
"step": 427063 |
|
}, |
|
{ |
|
"epoch": 27.9, |
|
"learning_rate": 0.000491034901629032, |
|
"loss": 0.0056, |
|
"step": 428792 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.04393070191144943, |
|
"eval_max_distance": 22, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 26.5483, |
|
"eval_samples_per_second": 37.667, |
|
"eval_steps_per_second": 2.524, |
|
"step": 430276 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 0.0004885345943327332, |
|
"loss": 0.0105, |
|
"step": 430521 |
|
}, |
|
{ |
|
"epoch": 28.13, |
|
"learning_rate": 0.0004860342870364345, |
|
"loss": 0.0093, |
|
"step": 432250 |
|
}, |
|
{ |
|
"epoch": 28.24, |
|
"learning_rate": 0.0004835339797401358, |
|
"loss": 0.0055, |
|
"step": 433979 |
|
}, |
|
{ |
|
"epoch": 28.35, |
|
"learning_rate": 0.0004810336724438371, |
|
"loss": 0.0049, |
|
"step": 435708 |
|
}, |
|
{ |
|
"epoch": 28.47, |
|
"learning_rate": 0.00047853336514753835, |
|
"loss": 0.0044, |
|
"step": 437437 |
|
}, |
|
{ |
|
"epoch": 28.58, |
|
"learning_rate": 0.0004760330578512397, |
|
"loss": 0.0044, |
|
"step": 439166 |
|
}, |
|
{ |
|
"epoch": 28.69, |
|
"learning_rate": 0.000473532750554941, |
|
"loss": 0.0047, |
|
"step": 440895 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 0.0004710324432586423, |
|
"loss": 0.0047, |
|
"step": 442624 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"learning_rate": 0.00046853213596234356, |
|
"loss": 0.0054, |
|
"step": 444353 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.04814203828573227, |
|
"eval_max_distance": 23, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 35.122, |
|
"eval_samples_per_second": 28.472, |
|
"eval_steps_per_second": 1.908, |
|
"step": 445643 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 0.00046603182866604485, |
|
"loss": 0.0103, |
|
"step": 446082 |
|
}, |
|
{ |
|
"epoch": 29.14, |
|
"learning_rate": 0.00046353152136974614, |
|
"loss": 0.0076, |
|
"step": 447811 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 0.00046103121407344743, |
|
"loss": 0.005, |
|
"step": 449540 |
|
}, |
|
{ |
|
"epoch": 29.37, |
|
"learning_rate": 0.00045853090677714877, |
|
"loss": 0.0043, |
|
"step": 451269 |
|
}, |
|
{ |
|
"epoch": 29.48, |
|
"learning_rate": 0.00045603059948085, |
|
"loss": 0.0043, |
|
"step": 452998 |
|
}, |
|
{ |
|
"epoch": 29.59, |
|
"learning_rate": 0.00045353029218455135, |
|
"loss": 0.004, |
|
"step": 454727 |
|
}, |
|
{ |
|
"epoch": 29.7, |
|
"learning_rate": 0.0004510299848882526, |
|
"loss": 0.0042, |
|
"step": 456456 |
|
}, |
|
{ |
|
"epoch": 29.82, |
|
"learning_rate": 6.808721912515757e-06, |
|
"loss": 0.0039, |
|
"step": 458185 |
|
}, |
|
{ |
|
"epoch": 29.93, |
|
"learning_rate": 2.641543085351246e-06, |
|
"loss": 0.004, |
|
"step": 459914 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.04417673125863075, |
|
"eval_max_distance": 25, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 68.6071, |
|
"eval_samples_per_second": 14.576, |
|
"eval_steps_per_second": 0.977, |
|
"step": 461010 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 461010, |
|
"total_flos": 1.1619438964958822e+17, |
|
"train_loss": 3.931375028864634e-05, |
|
"train_runtime": 411.3459, |
|
"train_samples_per_second": 16810.814, |
|
"train_steps_per_second": 1120.736 |
|
} |
|
], |
|
"logging_steps": 1729, |
|
"max_steps": 461010, |
|
"num_train_epochs": 30, |
|
"save_steps": 3458, |
|
"total_flos": 1.1619438964958822e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|