{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 77940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.2830382345393893e-07, "loss": 13.6904, "step": 1 }, { "epoch": 0.15, "learning_rate": 5.003849114703618e-05, "loss": 5.273, "step": 390 }, { "epoch": 0.3, "learning_rate": 0.00010007698229407236, "loss": 0.4624, "step": 780 }, { "epoch": 0.45, "learning_rate": 0.00015011547344110854, "loss": 0.3258, "step": 1170 }, { "epoch": 0.6, "learning_rate": 0.00020015396458814472, "loss": 0.2553, "step": 1560 }, { "epoch": 0.75, "learning_rate": 0.0002501924557351809, "loss": 0.2143, "step": 1950 }, { "epoch": 0.9, "learning_rate": 0.0003002309468822171, "loss": 0.181, "step": 2340 }, { "epoch": 1.0, "eval_loss": 0.21401263773441315, "eval_max_distance": 36, "eval_mean_distance": 4, "eval_runtime": 0.4225, "eval_samples_per_second": 118.338, "eval_steps_per_second": 4.734, "step": 2598 }, { "epoch": 1.05, "learning_rate": 0.0003502694380292533, "loss": 0.1557, "step": 2730 }, { "epoch": 1.2, "learning_rate": 0.00040030792917628943, "loss": 0.1402, "step": 3120 }, { "epoch": 1.35, "learning_rate": 0.00045034642032332564, "loss": 0.1283, "step": 3510 }, { "epoch": 1.5, "learning_rate": 0.0005003849114703618, "loss": 0.1194, "step": 3900 }, { "epoch": 1.65, "learning_rate": 0.0005504234026173979, "loss": 0.1158, "step": 4290 }, { "epoch": 1.8, "learning_rate": 0.0006004618937644341, "loss": 0.108, "step": 4680 }, { "epoch": 1.95, "learning_rate": 0.0006505003849114704, "loss": 0.1067, "step": 5070 }, { "epoch": 2.0, "eval_loss": 0.14157189428806305, "eval_max_distance": 29, "eval_mean_distance": 2, "eval_runtime": 0.4066, "eval_samples_per_second": 122.96, "eval_steps_per_second": 4.918, "step": 5196 }, { "epoch": 2.1, "learning_rate": 0.0007005388760585066, "loss": 0.0924, "step": 5460 }, { "epoch": 2.25, "learning_rate": 0.0007505773672055427, "loss": 0.0927, "step": 5850 }, { "epoch": 2.4, "learning_rate": 0.0008006158583525789, "loss": 0.0872, "step": 6240 }, { "epoch": 2.55, "learning_rate": 0.0008506543494996151, "loss": 0.0841, "step": 6630 }, { "epoch": 2.7, "learning_rate": 0.0009006928406466513, "loss": 0.0808, "step": 7020 }, { "epoch": 2.85, "learning_rate": 0.0009507313317936874, "loss": 0.0801, "step": 7410 }, { "epoch": 3.0, "eval_loss": 0.10980188101530075, "eval_max_distance": 22, "eval_mean_distance": 2, "eval_runtime": 0.4052, "eval_samples_per_second": 123.401, "eval_steps_per_second": 4.936, "step": 7794 }, { "epoch": 3.0, "learning_rate": 0.0009999144641176974, "loss": 0.0778, "step": 7800 }, { "epoch": 3.15, "learning_rate": 0.0009943546317680268, "loss": 0.0684, "step": 8190 }, { "epoch": 3.3, "learning_rate": 0.000988794799418356, "loss": 0.0692, "step": 8580 }, { "epoch": 3.45, "learning_rate": 0.0009832349670686853, "loss": 0.0629, "step": 8970 }, { "epoch": 3.6, "learning_rate": 0.0009776751347190145, "loss": 0.0653, "step": 9360 }, { "epoch": 3.75, "learning_rate": 0.0009721153023693439, "loss": 0.0595, "step": 9750 }, { "epoch": 3.9, "learning_rate": 0.0009665554700196733, "loss": 0.0575, "step": 10140 }, { "epoch": 4.0, "eval_loss": 0.10807737708091736, "eval_max_distance": 18, "eval_mean_distance": 2, "eval_runtime": 0.3927, "eval_samples_per_second": 127.308, "eval_steps_per_second": 5.092, "step": 10392 }, { "epoch": 4.05, "learning_rate": 0.0009609956376700025, "loss": 0.1012, "step": 10530 }, { "epoch": 4.2, "learning_rate": 0.0009554358053203319, "loss": 0.0526, "step": 10920 }, { "epoch": 4.35, "learning_rate": 0.0009498759729706612, "loss": 0.0496, "step": 11310 }, { "epoch": 4.5, "learning_rate": 0.0009443161406209905, "loss": 0.0492, "step": 11700 }, { "epoch": 4.65, "learning_rate": 0.0009387563082713198, "loss": 0.0483, "step": 12090 }, { "epoch": 4.8, "learning_rate": 0.0009331964759216492, "loss": 0.0469, "step": 12480 }, { "epoch": 4.95, "learning_rate": 0.0009276366435719784, "loss": 0.0452, "step": 12870 }, { "epoch": 5.0, "eval_loss": 0.08966636657714844, "eval_max_distance": 14, "eval_mean_distance": 1, "eval_runtime": 0.3879, "eval_samples_per_second": 128.906, "eval_steps_per_second": 5.156, "step": 12990 }, { "epoch": 5.1, "learning_rate": 0.0009220768112223078, "loss": 0.0392, "step": 13260 }, { "epoch": 5.25, "learning_rate": 0.0009165169788726371, "loss": 0.0397, "step": 13650 }, { "epoch": 5.4, "learning_rate": 0.0009109571465229664, "loss": 0.0393, "step": 14040 }, { "epoch": 5.55, "learning_rate": 0.0009053973141732957, "loss": 0.0399, "step": 14430 }, { "epoch": 5.7, "learning_rate": 0.0008998374818236251, "loss": 0.039, "step": 14820 }, { "epoch": 5.85, "learning_rate": 0.0008942776494739543, "loss": 0.0372, "step": 15210 }, { "epoch": 6.0, "eval_loss": 0.07197271287441254, "eval_max_distance": 15, "eval_mean_distance": 1, "eval_runtime": 0.3764, "eval_samples_per_second": 132.832, "eval_steps_per_second": 5.313, "step": 15588 }, { "epoch": 6.0, "learning_rate": 0.0008887178171242837, "loss": 0.039, "step": 15600 }, { "epoch": 6.15, "learning_rate": 0.000883157984774613, "loss": 0.0312, "step": 15990 }, { "epoch": 6.3, "learning_rate": 0.0008775981524249422, "loss": 0.0337, "step": 16380 }, { "epoch": 6.45, "learning_rate": 0.0008720383200752716, "loss": 0.0328, "step": 16770 }, { "epoch": 6.61, "learning_rate": 0.0008664784877256009, "loss": 0.0327, "step": 17160 }, { "epoch": 6.76, "learning_rate": 0.0008609186553759302, "loss": 0.0305, "step": 17550 }, { "epoch": 6.91, "learning_rate": 0.0008553588230262595, "loss": 0.0323, "step": 17940 }, { "epoch": 7.0, "eval_loss": 0.08398188650608063, "eval_max_distance": 12, "eval_mean_distance": 1, "eval_runtime": 0.3765, "eval_samples_per_second": 132.8, "eval_steps_per_second": 5.312, "step": 18186 }, { "epoch": 7.06, "learning_rate": 0.0008497989906765889, "loss": 0.0286, "step": 18330 }, { "epoch": 7.21, "learning_rate": 0.0008442391583269181, "loss": 0.0263, "step": 18720 }, { "epoch": 7.36, "learning_rate": 0.0008386793259772475, "loss": 0.0269, "step": 19110 }, { "epoch": 7.51, "learning_rate": 0.0008331194936275768, "loss": 0.0268, "step": 19500 }, { "epoch": 7.66, "learning_rate": 0.0008275596612779061, "loss": 0.0283, "step": 19890 }, { "epoch": 7.81, "learning_rate": 0.0008219998289282354, "loss": 0.0274, "step": 20280 }, { "epoch": 7.96, "learning_rate": 0.0008164399965785648, "loss": 0.0267, "step": 20670 }, { "epoch": 8.0, "eval_loss": 0.07682657241821289, "eval_max_distance": 16, "eval_mean_distance": 1, "eval_runtime": 0.3687, "eval_samples_per_second": 135.62, "eval_steps_per_second": 5.425, "step": 20784 }, { "epoch": 8.11, "learning_rate": 0.000810880164228894, "loss": 0.0235, "step": 21060 }, { "epoch": 8.26, "learning_rate": 0.0008053203318792234, "loss": 0.0221, "step": 21450 }, { "epoch": 8.41, "learning_rate": 0.0007997604995295527, "loss": 0.0221, "step": 21840 }, { "epoch": 8.56, "learning_rate": 0.0007942006671798819, "loss": 0.0223, "step": 22230 }, { "epoch": 8.71, "learning_rate": 0.0007886408348302113, "loss": 0.0233, "step": 22620 }, { "epoch": 8.86, "learning_rate": 0.0007830810024805405, "loss": 0.0231, "step": 23010 }, { "epoch": 9.0, "eval_loss": 0.06973634660243988, "eval_max_distance": 10, "eval_mean_distance": 1, "eval_runtime": 0.3759, "eval_samples_per_second": 133.026, "eval_steps_per_second": 5.321, "step": 23382 }, { "epoch": 9.01, "learning_rate": 0.0007775211701308699, "loss": 0.0227, "step": 23400 }, { "epoch": 9.16, "learning_rate": 0.0007719613377811992, "loss": 0.0185, "step": 23790 }, { "epoch": 9.31, "learning_rate": 0.0007664015054315285, "loss": 0.0183, "step": 24180 }, { "epoch": 9.46, "learning_rate": 0.0007608416730818578, "loss": 0.0191, "step": 24570 }, { "epoch": 9.61, "learning_rate": 0.0007552818407321872, "loss": 0.019, "step": 24960 }, { "epoch": 9.76, "learning_rate": 0.0007497220083825164, "loss": 0.0193, "step": 25350 }, { "epoch": 9.91, "learning_rate": 0.0007441621760328458, "loss": 0.0199, "step": 25740 }, { "epoch": 10.0, "eval_loss": 0.07169829308986664, "eval_max_distance": 9, "eval_mean_distance": 1, "eval_runtime": 0.3704, "eval_samples_per_second": 134.993, "eval_steps_per_second": 5.4, "step": 25980 }, { "epoch": 10.06, "learning_rate": 0.0007386023436831751, "loss": 0.0184, "step": 26130 }, { "epoch": 10.21, "learning_rate": 0.0007330425113335044, "loss": 0.016, "step": 26520 }, { "epoch": 10.36, "learning_rate": 0.0007274826789838337, "loss": 0.0164, "step": 26910 }, { "epoch": 10.51, "learning_rate": 0.0007219228466341631, "loss": 0.016, "step": 27300 }, { "epoch": 10.66, "learning_rate": 0.0007163630142844923, "loss": 0.0169, "step": 27690 }, { "epoch": 10.81, "learning_rate": 0.0007108031819348217, "loss": 0.0165, "step": 28080 }, { "epoch": 10.96, "learning_rate": 0.000705243349585151, "loss": 0.0168, "step": 28470 }, { "epoch": 11.0, "eval_loss": 0.08123478293418884, "eval_max_distance": 16, "eval_mean_distance": 1, "eval_runtime": 0.3865, "eval_samples_per_second": 129.356, "eval_steps_per_second": 5.174, "step": 28578 }, { "epoch": 11.11, "learning_rate": 0.0006996835172354803, "loss": 0.015, "step": 28860 }, { "epoch": 11.26, "learning_rate": 0.0006941236848858096, "loss": 0.0137, "step": 29250 }, { "epoch": 11.41, "learning_rate": 0.0006885638525361389, "loss": 0.0151, "step": 29640 }, { "epoch": 11.56, "learning_rate": 0.0006830040201864682, "loss": 0.0144, "step": 30030 }, { "epoch": 11.71, "learning_rate": 0.0006774441878367975, "loss": 0.0147, "step": 30420 }, { "epoch": 11.86, "learning_rate": 0.0006718843554871269, "loss": 0.0148, "step": 30810 }, { "epoch": 12.0, "eval_loss": 0.09610763192176819, "eval_max_distance": 12, "eval_mean_distance": 1, "eval_runtime": 0.3633, "eval_samples_per_second": 137.639, "eval_steps_per_second": 5.506, "step": 31176 }, { "epoch": 12.01, "learning_rate": 0.0006663245231374561, "loss": 0.0145, "step": 31200 }, { "epoch": 12.16, "learning_rate": 0.0006607646907877855, "loss": 0.0124, "step": 31590 }, { "epoch": 12.31, "learning_rate": 0.0006552048584381148, "loss": 0.0117, "step": 31980 }, { "epoch": 12.46, "learning_rate": 0.0006496450260884441, "loss": 0.0121, "step": 32370 }, { "epoch": 12.61, "learning_rate": 0.0006440851937387734, "loss": 0.0124, "step": 32760 }, { "epoch": 12.76, "learning_rate": 0.0006385253613891028, "loss": 0.0125, "step": 33150 }, { "epoch": 12.91, "learning_rate": 0.000632965529039432, "loss": 0.0128, "step": 33540 }, { "epoch": 13.0, "eval_loss": 0.08225859701633453, "eval_max_distance": 9, "eval_mean_distance": 1, "eval_runtime": 0.3712, "eval_samples_per_second": 134.695, "eval_steps_per_second": 5.388, "step": 33774 }, { "epoch": 13.06, "learning_rate": 0.0006274056966897614, "loss": 0.0116, "step": 33930 }, { "epoch": 13.21, "learning_rate": 0.0006218458643400907, "loss": 0.0106, "step": 34320 }, { "epoch": 13.36, "learning_rate": 0.00061628603199042, "loss": 0.0104, "step": 34710 }, { "epoch": 13.51, "learning_rate": 0.0006107261996407493, "loss": 0.011, "step": 35100 }, { "epoch": 13.66, "learning_rate": 0.0006051663672910787, "loss": 0.0108, "step": 35490 }, { "epoch": 13.81, "learning_rate": 0.0005996065349414079, "loss": 0.0111, "step": 35880 }, { "epoch": 13.96, "learning_rate": 0.0005940467025917372, "loss": 0.0112, "step": 36270 }, { "epoch": 14.0, "eval_loss": 0.07655028253793716, "eval_max_distance": 12, "eval_mean_distance": 1, "eval_runtime": 0.361, "eval_samples_per_second": 138.506, "eval_steps_per_second": 5.54, "step": 36372 }, { "epoch": 14.11, "learning_rate": 0.0005884868702420666, "loss": 0.0098, "step": 36660 }, { "epoch": 14.26, "learning_rate": 0.0005829270378923958, "loss": 0.009, "step": 37050 }, { "epoch": 14.41, "learning_rate": 0.0005773672055427252, "loss": 0.0093, "step": 37440 }, { "epoch": 14.56, "learning_rate": 0.0005718073731930545, "loss": 0.0095, "step": 37830 }, { "epoch": 14.71, "learning_rate": 0.0005662475408433838, "loss": 0.0093, "step": 38220 }, { "epoch": 14.86, "learning_rate": 0.0005606877084937131, "loss": 0.0093, "step": 38610 }, { "epoch": 15.0, "eval_loss": 0.07127052545547485, "eval_max_distance": 9, "eval_mean_distance": 1, "eval_runtime": 0.3671, "eval_samples_per_second": 136.219, "eval_steps_per_second": 5.449, "step": 38970 }, { "epoch": 15.01, "learning_rate": 0.0005551278761440425, "loss": 0.0101, "step": 39000 }, { "epoch": 15.16, "learning_rate": 0.0005495680437943717, "loss": 0.0078, "step": 39390 }, { "epoch": 15.31, "learning_rate": 0.0005440082114447011, "loss": 0.0079, "step": 39780 }, { "epoch": 15.46, "learning_rate": 0.0005384483790950304, "loss": 0.0081, "step": 40170 }, { "epoch": 15.61, "learning_rate": 0.0005328885467453597, "loss": 0.0085, "step": 40560 }, { "epoch": 15.76, "learning_rate": 0.000527328714395689, "loss": 0.0088, "step": 40950 }, { "epoch": 15.91, "learning_rate": 0.0005217688820460184, "loss": 0.0083, "step": 41340 }, { "epoch": 16.0, "eval_loss": 0.08469703793525696, "eval_max_distance": 14, "eval_mean_distance": 1, "eval_runtime": 0.3815, "eval_samples_per_second": 131.073, "eval_steps_per_second": 5.243, "step": 41568 }, { "epoch": 16.06, "learning_rate": 0.0005162090496963476, "loss": 0.0081, "step": 41730 }, { "epoch": 16.21, "learning_rate": 0.000510649217346677, "loss": 0.0069, "step": 42120 }, { "epoch": 16.36, "learning_rate": 0.0005050893849970063, "loss": 0.007, "step": 42510 }, { "epoch": 16.51, "learning_rate": 0.0004995295526473355, "loss": 0.0071, "step": 42900 }, { "epoch": 16.66, "learning_rate": 0.0004939697202976649, "loss": 0.0073, "step": 43290 }, { "epoch": 16.81, "learning_rate": 0.0004884098879479942, "loss": 0.0076, "step": 43680 }, { "epoch": 16.96, "learning_rate": 0.0004828500555983235, "loss": 0.0076, "step": 44070 }, { "epoch": 17.0, "eval_loss": 0.08625645935535431, "eval_max_distance": 11, "eval_mean_distance": 1, "eval_runtime": 0.3551, "eval_samples_per_second": 140.8, "eval_steps_per_second": 5.632, "step": 44166 }, { "epoch": 17.11, "learning_rate": 0.00047729022324865286, "loss": 0.0064, "step": 44460 }, { "epoch": 17.26, "learning_rate": 0.00047173039089898214, "loss": 0.0059, "step": 44850 }, { "epoch": 17.41, "learning_rate": 0.0004661705585493115, "loss": 0.0064, "step": 45240 }, { "epoch": 17.56, "learning_rate": 0.0004606107261996408, "loss": 0.0068, "step": 45630 }, { "epoch": 17.71, "learning_rate": 0.00045505089384997004, "loss": 0.0066, "step": 46020 }, { "epoch": 17.86, "learning_rate": 0.00044949106150029937, "loss": 0.0064, "step": 46410 }, { "epoch": 18.0, "eval_loss": 0.08296500891447067, "eval_max_distance": 14, "eval_mean_distance": 1, "eval_runtime": 0.3721, "eval_samples_per_second": 134.372, "eval_steps_per_second": 5.375, "step": 46764 }, { "epoch": 18.01, "learning_rate": 0.0004439312291506287, "loss": 0.0065, "step": 46800 }, { "epoch": 18.16, "learning_rate": 0.000438371396800958, "loss": 0.0055, "step": 47190 }, { "epoch": 18.31, "learning_rate": 0.0004328115644512873, "loss": 0.0052, "step": 47580 }, { "epoch": 18.46, "learning_rate": 0.00042725173210161665, "loss": 0.0057, "step": 47970 }, { "epoch": 18.61, "learning_rate": 0.00042169189975194593, "loss": 0.006, "step": 48360 }, { "epoch": 18.76, "learning_rate": 0.00041613206740227527, "loss": 0.0055, "step": 48750 }, { "epoch": 18.91, "learning_rate": 0.0004105722350526046, "loss": 0.0054, "step": 49140 }, { "epoch": 19.0, "eval_loss": 0.08839410543441772, "eval_max_distance": 11, "eval_mean_distance": 1, "eval_runtime": 0.367, "eval_samples_per_second": 136.245, "eval_steps_per_second": 5.45, "step": 49362 }, { "epoch": 19.06, "learning_rate": 0.0004050124027029339, "loss": 0.0057, "step": 49530 }, { "epoch": 19.21, "learning_rate": 0.0003994525703532632, "loss": 0.0047, "step": 49920 }, { "epoch": 19.36, "learning_rate": 0.0003938927380035925, "loss": 0.0048, "step": 50310 }, { "epoch": 19.52, "learning_rate": 0.00038833290565392183, "loss": 0.0052, "step": 50700 }, { "epoch": 19.67, "learning_rate": 0.00038277307330425117, "loss": 0.005, "step": 51090 }, { "epoch": 19.82, "learning_rate": 0.00037721324095458045, "loss": 0.0048, "step": 51480 }, { "epoch": 19.97, "learning_rate": 0.0003716534086049098, "loss": 0.0052, "step": 51870 }, { "epoch": 20.0, "eval_loss": 0.08214738219976425, "eval_max_distance": 10, "eval_mean_distance": 1, "eval_runtime": 0.3692, "eval_samples_per_second": 135.434, "eval_steps_per_second": 5.417, "step": 51960 }, { "epoch": 20.12, "learning_rate": 0.0003660935762552391, "loss": 0.0049, "step": 52260 }, { "epoch": 20.27, "learning_rate": 0.00036053374390556834, "loss": 0.0043, "step": 52650 }, { "epoch": 20.42, "learning_rate": 0.0003549739115558977, "loss": 0.0043, "step": 53040 }, { "epoch": 20.57, "learning_rate": 0.000349414079206227, "loss": 0.0044, "step": 53430 }, { "epoch": 20.72, "learning_rate": 0.0003438542468565563, "loss": 0.0044, "step": 53820 }, { "epoch": 20.87, "learning_rate": 0.0003382944145068856, "loss": 0.0045, "step": 54210 }, { "epoch": 21.0, "eval_loss": 0.0914614275097847, "eval_max_distance": 14, "eval_mean_distance": 1, "eval_runtime": 0.3653, "eval_samples_per_second": 136.874, "eval_steps_per_second": 5.475, "step": 54558 }, { "epoch": 21.02, "learning_rate": 0.00033273458215721496, "loss": 0.0041, "step": 54600 }, { "epoch": 21.17, "learning_rate": 0.00032717474980754424, "loss": 0.0035, "step": 54990 }, { "epoch": 21.32, "learning_rate": 0.0003216149174578736, "loss": 0.0038, "step": 55380 }, { "epoch": 21.47, "learning_rate": 0.0003160550851082029, "loss": 0.0038, "step": 55770 }, { "epoch": 21.62, "learning_rate": 0.0003104952527585322, "loss": 0.0041, "step": 56160 }, { "epoch": 21.77, "learning_rate": 0.0003049354204088615, "loss": 0.004, "step": 56550 }, { "epoch": 21.92, "learning_rate": 0.00029937558805919086, "loss": 0.0037, "step": 56940 }, { "epoch": 22.0, "eval_loss": 0.09314610809087753, "eval_max_distance": 14, "eval_mean_distance": 1, "eval_runtime": 0.3634, "eval_samples_per_second": 137.604, "eval_steps_per_second": 5.504, "step": 57156 }, { "epoch": 22.07, "learning_rate": 0.00029381575570952014, "loss": 0.0037, "step": 57330 }, { "epoch": 22.22, "learning_rate": 0.0002882559233598495, "loss": 0.0033, "step": 57720 }, { "epoch": 22.37, "learning_rate": 0.0002826960910101788, "loss": 0.0034, "step": 58110 }, { "epoch": 22.52, "learning_rate": 0.0002771362586605081, "loss": 0.0034, "step": 58500 }, { "epoch": 22.67, "learning_rate": 0.0002715764263108374, "loss": 0.0035, "step": 58890 }, { "epoch": 22.82, "learning_rate": 0.0002660165939611667, "loss": 0.0034, "step": 59280 }, { "epoch": 22.97, "learning_rate": 0.000260456761611496, "loss": 0.0036, "step": 59670 }, { "epoch": 23.0, "eval_loss": 0.09405915439128876, "eval_max_distance": 9, "eval_mean_distance": 1, "eval_runtime": 0.3715, "eval_samples_per_second": 134.573, "eval_steps_per_second": 5.383, "step": 59754 }, { "epoch": 23.12, "learning_rate": 0.0002548969292618253, "loss": 0.003, "step": 60060 }, { "epoch": 23.27, "learning_rate": 0.00024933709691215465, "loss": 0.0031, "step": 60450 }, { "epoch": 23.42, "learning_rate": 0.00024377726456248396, "loss": 0.003, "step": 60840 }, { "epoch": 23.57, "learning_rate": 0.00023821743221281327, "loss": 0.0029, "step": 61230 }, { "epoch": 23.72, "learning_rate": 0.00023265759986314258, "loss": 0.0028, "step": 61620 }, { "epoch": 23.87, "learning_rate": 0.0002270977675134719, "loss": 0.0028, "step": 62010 }, { "epoch": 24.0, "eval_loss": 0.08611776679754257, "eval_max_distance": 13, "eval_mean_distance": 1, "eval_runtime": 0.3594, "eval_samples_per_second": 139.139, "eval_steps_per_second": 5.566, "step": 62352 }, { "epoch": 24.02, "learning_rate": 0.00022153793516380122, "loss": 0.0027, "step": 62400 }, { "epoch": 24.17, "learning_rate": 0.00021597810281413053, "loss": 0.0026, "step": 62790 }, { "epoch": 24.32, "learning_rate": 0.00021041827046445986, "loss": 0.0027, "step": 63180 }, { "epoch": 24.47, "learning_rate": 0.00020485843811478917, "loss": 0.0027, "step": 63570 }, { "epoch": 24.62, "learning_rate": 0.00019929860576511847, "loss": 0.0027, "step": 63960 }, { "epoch": 24.77, "learning_rate": 0.00019373877341544778, "loss": 0.0024, "step": 64350 }, { "epoch": 24.92, "learning_rate": 0.0001881789410657771, "loss": 0.0026, "step": 64740 }, { "epoch": 25.0, "eval_loss": 0.09115344285964966, "eval_max_distance": 12, "eval_mean_distance": 1, "eval_runtime": 0.3622, "eval_samples_per_second": 138.045, "eval_steps_per_second": 5.522, "step": 64950 }, { "epoch": 25.07, "learning_rate": 0.0001826191087161064, "loss": 0.0026, "step": 65130 }, { "epoch": 25.22, "learning_rate": 0.00017705927636643573, "loss": 0.0023, "step": 65520 }, { "epoch": 25.37, "learning_rate": 0.00017149944401676504, "loss": 0.0023, "step": 65910 }, { "epoch": 25.52, "learning_rate": 0.00016593961166709435, "loss": 0.0021, "step": 66300 }, { "epoch": 25.67, "learning_rate": 0.00016037977931742368, "loss": 0.0021, "step": 66690 }, { "epoch": 25.82, "learning_rate": 0.000154819946967753, "loss": 0.0024, "step": 67080 }, { "epoch": 25.97, "learning_rate": 0.0001492601146180823, "loss": 0.0024, "step": 67470 }, { "epoch": 26.0, "eval_loss": 0.09158334881067276, "eval_max_distance": 9, "eval_mean_distance": 0, "eval_runtime": 0.3618, "eval_samples_per_second": 138.208, "eval_steps_per_second": 5.528, "step": 67548 }, { "epoch": 26.12, "learning_rate": 0.0001437002822684116, "loss": 0.0021, "step": 67860 }, { "epoch": 26.27, "learning_rate": 0.0001381404499187409, "loss": 0.0019, "step": 68250 }, { "epoch": 26.42, "learning_rate": 0.00013258061756907022, "loss": 0.002, "step": 68640 }, { "epoch": 26.57, "learning_rate": 0.00012702078521939955, "loss": 0.0019, "step": 69030 }, { "epoch": 26.72, "learning_rate": 0.00012146095286972886, "loss": 0.0021, "step": 69420 }, { "epoch": 26.87, "learning_rate": 0.00011590112052005817, "loss": 0.002, "step": 69810 }, { "epoch": 27.0, "eval_loss": 0.08878373354673386, "eval_max_distance": 9, "eval_mean_distance": 0, "eval_runtime": 0.3454, "eval_samples_per_second": 144.754, "eval_steps_per_second": 5.79, "step": 70146 }, { "epoch": 27.02, "learning_rate": 0.00011034128817038747, "loss": 0.0021, "step": 70200 }, { "epoch": 27.17, "learning_rate": 0.0001047814558207168, "loss": 0.0017, "step": 70590 }, { "epoch": 27.32, "learning_rate": 9.92216234710461e-05, "loss": 0.0018, "step": 70980 }, { "epoch": 27.47, "learning_rate": 9.366179112137542e-05, "loss": 0.0017, "step": 71370 }, { "epoch": 27.62, "learning_rate": 8.810195877170473e-05, "loss": 0.0016, "step": 71760 }, { "epoch": 27.77, "learning_rate": 8.254212642203404e-05, "loss": 0.002, "step": 72150 }, { "epoch": 27.92, "learning_rate": 7.698229407236336e-05, "loss": 0.0017, "step": 72540 }, { "epoch": 28.0, "eval_loss": 0.08879587054252625, "eval_max_distance": 9, "eval_mean_distance": 0, "eval_runtime": 0.3476, "eval_samples_per_second": 143.846, "eval_steps_per_second": 5.754, "step": 72744 }, { "epoch": 28.07, "learning_rate": 7.142246172269268e-05, "loss": 0.0016, "step": 72930 }, { "epoch": 28.22, "learning_rate": 6.586262937302199e-05, "loss": 0.0016, "step": 73320 }, { "epoch": 28.37, "learning_rate": 6.03027970233513e-05, "loss": 0.0016, "step": 73710 }, { "epoch": 28.52, "learning_rate": 5.474296467368061e-05, "loss": 0.0016, "step": 74100 }, { "epoch": 28.67, "learning_rate": 4.9183132324009924e-05, "loss": 0.0016, "step": 74490 }, { "epoch": 28.82, "learning_rate": 4.362329997433924e-05, "loss": 0.0017, "step": 74880 }, { "epoch": 28.97, "learning_rate": 3.806346762466855e-05, "loss": 0.0017, "step": 75270 }, { "epoch": 29.0, "eval_loss": 0.09515639394521713, "eval_max_distance": 9, "eval_mean_distance": 0, "eval_runtime": 0.343, "eval_samples_per_second": 145.752, "eval_steps_per_second": 5.83, "step": 75342 }, { "epoch": 29.12, "learning_rate": 3.250363527499786e-05, "loss": 0.0015, "step": 75660 }, { "epoch": 29.27, "learning_rate": 2.6943802925327177e-05, "loss": 0.0016, "step": 76050 }, { "epoch": 29.42, "learning_rate": 2.1383970575656488e-05, "loss": 0.0014, "step": 76440 }, { "epoch": 29.57, "learning_rate": 1.5824138225985802e-05, "loss": 0.0013, "step": 76830 }, { "epoch": 29.72, "learning_rate": 1.0264305876315115e-05, "loss": 0.0014, "step": 77220 }, { "epoch": 29.87, "learning_rate": 4.704473526644427e-06, "loss": 0.0014, "step": 77610 }, { "epoch": 30.0, "eval_loss": 0.09847646951675415, "eval_max_distance": 9, "eval_mean_distance": 0, "eval_runtime": 0.3435, "eval_samples_per_second": 145.564, "eval_steps_per_second": 5.823, "step": 77940 }, { "epoch": 30.0, "step": 77940, "total_flos": 4.517674593940685e+16, "train_loss": 0.053724035134690526, "train_runtime": 6582.4117, "train_samples_per_second": 355.137, "train_steps_per_second": 11.841 } ], "logging_steps": 390, "max_steps": 77940, "num_train_epochs": 30, "save_steps": 780, "total_flos": 4.517674593940685e+16, "trial_name": null, "trial_params": null }