|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 77940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.2830382345393893e-07, |
|
"loss": 13.6904, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.003849114703618e-05, |
|
"loss": 5.273, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00010007698229407236, |
|
"loss": 0.4624, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00015011547344110854, |
|
"loss": 0.3258, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00020015396458814472, |
|
"loss": 0.2553, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002501924557351809, |
|
"loss": 0.2143, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0003002309468822171, |
|
"loss": 0.181, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.21401263773441315, |
|
"eval_max_distance": 36, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 0.4225, |
|
"eval_samples_per_second": 118.338, |
|
"eval_steps_per_second": 4.734, |
|
"step": 2598 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0003502694380292533, |
|
"loss": 0.1557, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00040030792917628943, |
|
"loss": 0.1402, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00045034642032332564, |
|
"loss": 0.1283, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0005003849114703618, |
|
"loss": 0.1194, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0005504234026173979, |
|
"loss": 0.1158, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0006004618937644341, |
|
"loss": 0.108, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0006505003849114704, |
|
"loss": 0.1067, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.14157189428806305, |
|
"eval_max_distance": 29, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 0.4066, |
|
"eval_samples_per_second": 122.96, |
|
"eval_steps_per_second": 4.918, |
|
"step": 5196 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0007005388760585066, |
|
"loss": 0.0924, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0007505773672055427, |
|
"loss": 0.0927, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0008006158583525789, |
|
"loss": 0.0872, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0008506543494996151, |
|
"loss": 0.0841, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0009006928406466513, |
|
"loss": 0.0808, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0009507313317936874, |
|
"loss": 0.0801, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.10980188101530075, |
|
"eval_max_distance": 22, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 0.4052, |
|
"eval_samples_per_second": 123.401, |
|
"eval_steps_per_second": 4.936, |
|
"step": 7794 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0009999144641176974, |
|
"loss": 0.0778, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0009943546317680268, |
|
"loss": 0.0684, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.000988794799418356, |
|
"loss": 0.0692, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.0009832349670686853, |
|
"loss": 0.0629, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0009776751347190145, |
|
"loss": 0.0653, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0009721153023693439, |
|
"loss": 0.0595, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0009665554700196733, |
|
"loss": 0.0575, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.10807737708091736, |
|
"eval_max_distance": 18, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 0.3927, |
|
"eval_samples_per_second": 127.308, |
|
"eval_steps_per_second": 5.092, |
|
"step": 10392 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0009609956376700025, |
|
"loss": 0.1012, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0009554358053203319, |
|
"loss": 0.0526, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0009498759729706612, |
|
"loss": 0.0496, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0009443161406209905, |
|
"loss": 0.0492, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.0009387563082713198, |
|
"loss": 0.0483, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0009331964759216492, |
|
"loss": 0.0469, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0009276366435719784, |
|
"loss": 0.0452, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.08966636657714844, |
|
"eval_max_distance": 14, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3879, |
|
"eval_samples_per_second": 128.906, |
|
"eval_steps_per_second": 5.156, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0009220768112223078, |
|
"loss": 0.0392, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.0009165169788726371, |
|
"loss": 0.0397, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.0009109571465229664, |
|
"loss": 0.0393, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.0009053973141732957, |
|
"loss": 0.0399, |
|
"step": 14430 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.0008998374818236251, |
|
"loss": 0.039, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0008942776494739543, |
|
"loss": 0.0372, |
|
"step": 15210 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.07197271287441254, |
|
"eval_max_distance": 15, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3764, |
|
"eval_samples_per_second": 132.832, |
|
"eval_steps_per_second": 5.313, |
|
"step": 15588 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0008887178171242837, |
|
"loss": 0.039, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.000883157984774613, |
|
"loss": 0.0312, |
|
"step": 15990 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0008775981524249422, |
|
"loss": 0.0337, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0008720383200752716, |
|
"loss": 0.0328, |
|
"step": 16770 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.0008664784877256009, |
|
"loss": 0.0327, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.0008609186553759302, |
|
"loss": 0.0305, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 0.0008553588230262595, |
|
"loss": 0.0323, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.08398188650608063, |
|
"eval_max_distance": 12, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3765, |
|
"eval_samples_per_second": 132.8, |
|
"eval_steps_per_second": 5.312, |
|
"step": 18186 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 0.0008497989906765889, |
|
"loss": 0.0286, |
|
"step": 18330 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.0008442391583269181, |
|
"loss": 0.0263, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.0008386793259772475, |
|
"loss": 0.0269, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.0008331194936275768, |
|
"loss": 0.0268, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.0008275596612779061, |
|
"loss": 0.0283, |
|
"step": 19890 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 0.0008219998289282354, |
|
"loss": 0.0274, |
|
"step": 20280 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0008164399965785648, |
|
"loss": 0.0267, |
|
"step": 20670 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.07682657241821289, |
|
"eval_max_distance": 16, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3687, |
|
"eval_samples_per_second": 135.62, |
|
"eval_steps_per_second": 5.425, |
|
"step": 20784 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 0.000810880164228894, |
|
"loss": 0.0235, |
|
"step": 21060 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 0.0008053203318792234, |
|
"loss": 0.0221, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 0.0007997604995295527, |
|
"loss": 0.0221, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 0.0007942006671798819, |
|
"loss": 0.0223, |
|
"step": 22230 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.0007886408348302113, |
|
"loss": 0.0233, |
|
"step": 22620 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.0007830810024805405, |
|
"loss": 0.0231, |
|
"step": 23010 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.06973634660243988, |
|
"eval_max_distance": 10, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3759, |
|
"eval_samples_per_second": 133.026, |
|
"eval_steps_per_second": 5.321, |
|
"step": 23382 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 0.0007775211701308699, |
|
"loss": 0.0227, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 0.0007719613377811992, |
|
"loss": 0.0185, |
|
"step": 23790 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 0.0007664015054315285, |
|
"loss": 0.0183, |
|
"step": 24180 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.0007608416730818578, |
|
"loss": 0.0191, |
|
"step": 24570 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.0007552818407321872, |
|
"loss": 0.019, |
|
"step": 24960 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 0.0007497220083825164, |
|
"loss": 0.0193, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.0007441621760328458, |
|
"loss": 0.0199, |
|
"step": 25740 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.07169829308986664, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3704, |
|
"eval_samples_per_second": 134.993, |
|
"eval_steps_per_second": 5.4, |
|
"step": 25980 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 0.0007386023436831751, |
|
"loss": 0.0184, |
|
"step": 26130 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.0007330425113335044, |
|
"loss": 0.016, |
|
"step": 26520 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 0.0007274826789838337, |
|
"loss": 0.0164, |
|
"step": 26910 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 0.0007219228466341631, |
|
"loss": 0.016, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 0.0007163630142844923, |
|
"loss": 0.0169, |
|
"step": 27690 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 0.0007108031819348217, |
|
"loss": 0.0165, |
|
"step": 28080 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.000705243349585151, |
|
"loss": 0.0168, |
|
"step": 28470 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.08123478293418884, |
|
"eval_max_distance": 16, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3865, |
|
"eval_samples_per_second": 129.356, |
|
"eval_steps_per_second": 5.174, |
|
"step": 28578 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 0.0006996835172354803, |
|
"loss": 0.015, |
|
"step": 28860 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 0.0006941236848858096, |
|
"loss": 0.0137, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 0.0006885638525361389, |
|
"loss": 0.0151, |
|
"step": 29640 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 0.0006830040201864682, |
|
"loss": 0.0144, |
|
"step": 30030 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 0.0006774441878367975, |
|
"loss": 0.0147, |
|
"step": 30420 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.0006718843554871269, |
|
"loss": 0.0148, |
|
"step": 30810 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.09610763192176819, |
|
"eval_max_distance": 12, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3633, |
|
"eval_samples_per_second": 137.639, |
|
"eval_steps_per_second": 5.506, |
|
"step": 31176 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.0006663245231374561, |
|
"loss": 0.0145, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 0.0006607646907877855, |
|
"loss": 0.0124, |
|
"step": 31590 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 0.0006552048584381148, |
|
"loss": 0.0117, |
|
"step": 31980 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.0006496450260884441, |
|
"loss": 0.0121, |
|
"step": 32370 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 0.0006440851937387734, |
|
"loss": 0.0124, |
|
"step": 32760 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 0.0006385253613891028, |
|
"loss": 0.0125, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 0.000632965529039432, |
|
"loss": 0.0128, |
|
"step": 33540 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.08225859701633453, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3712, |
|
"eval_samples_per_second": 134.695, |
|
"eval_steps_per_second": 5.388, |
|
"step": 33774 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 0.0006274056966897614, |
|
"loss": 0.0116, |
|
"step": 33930 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 0.0006218458643400907, |
|
"loss": 0.0106, |
|
"step": 34320 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"learning_rate": 0.00061628603199042, |
|
"loss": 0.0104, |
|
"step": 34710 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 0.0006107261996407493, |
|
"loss": 0.011, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 0.0006051663672910787, |
|
"loss": 0.0108, |
|
"step": 35490 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 0.0005996065349414079, |
|
"loss": 0.0111, |
|
"step": 35880 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 0.0005940467025917372, |
|
"loss": 0.0112, |
|
"step": 36270 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.07655028253793716, |
|
"eval_max_distance": 12, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.361, |
|
"eval_samples_per_second": 138.506, |
|
"eval_steps_per_second": 5.54, |
|
"step": 36372 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 0.0005884868702420666, |
|
"loss": 0.0098, |
|
"step": 36660 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 0.0005829270378923958, |
|
"loss": 0.009, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 0.0005773672055427252, |
|
"loss": 0.0093, |
|
"step": 37440 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 0.0005718073731930545, |
|
"loss": 0.0095, |
|
"step": 37830 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 0.0005662475408433838, |
|
"loss": 0.0093, |
|
"step": 38220 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 0.0005606877084937131, |
|
"loss": 0.0093, |
|
"step": 38610 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.07127052545547485, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3671, |
|
"eval_samples_per_second": 136.219, |
|
"eval_steps_per_second": 5.449, |
|
"step": 38970 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 0.0005551278761440425, |
|
"loss": 0.0101, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 0.0005495680437943717, |
|
"loss": 0.0078, |
|
"step": 39390 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 0.0005440082114447011, |
|
"loss": 0.0079, |
|
"step": 39780 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 0.0005384483790950304, |
|
"loss": 0.0081, |
|
"step": 40170 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 0.0005328885467453597, |
|
"loss": 0.0085, |
|
"step": 40560 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 0.000527328714395689, |
|
"loss": 0.0088, |
|
"step": 40950 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 0.0005217688820460184, |
|
"loss": 0.0083, |
|
"step": 41340 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.08469703793525696, |
|
"eval_max_distance": 14, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3815, |
|
"eval_samples_per_second": 131.073, |
|
"eval_steps_per_second": 5.243, |
|
"step": 41568 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 0.0005162090496963476, |
|
"loss": 0.0081, |
|
"step": 41730 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 0.000510649217346677, |
|
"loss": 0.0069, |
|
"step": 42120 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 0.0005050893849970063, |
|
"loss": 0.007, |
|
"step": 42510 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 0.0004995295526473355, |
|
"loss": 0.0071, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 0.0004939697202976649, |
|
"loss": 0.0073, |
|
"step": 43290 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 0.0004884098879479942, |
|
"loss": 0.0076, |
|
"step": 43680 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.0004828500555983235, |
|
"loss": 0.0076, |
|
"step": 44070 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.08625645935535431, |
|
"eval_max_distance": 11, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3551, |
|
"eval_samples_per_second": 140.8, |
|
"eval_steps_per_second": 5.632, |
|
"step": 44166 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 0.00047729022324865286, |
|
"loss": 0.0064, |
|
"step": 44460 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 0.00047173039089898214, |
|
"loss": 0.0059, |
|
"step": 44850 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 0.0004661705585493115, |
|
"loss": 0.0064, |
|
"step": 45240 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 0.0004606107261996408, |
|
"loss": 0.0068, |
|
"step": 45630 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 0.00045505089384997004, |
|
"loss": 0.0066, |
|
"step": 46020 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 0.00044949106150029937, |
|
"loss": 0.0064, |
|
"step": 46410 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.08296500891447067, |
|
"eval_max_distance": 14, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3721, |
|
"eval_samples_per_second": 134.372, |
|
"eval_steps_per_second": 5.375, |
|
"step": 46764 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 0.0004439312291506287, |
|
"loss": 0.0065, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 0.000438371396800958, |
|
"loss": 0.0055, |
|
"step": 47190 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 0.0004328115644512873, |
|
"loss": 0.0052, |
|
"step": 47580 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 0.00042725173210161665, |
|
"loss": 0.0057, |
|
"step": 47970 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 0.00042169189975194593, |
|
"loss": 0.006, |
|
"step": 48360 |
|
}, |
|
{ |
|
"epoch": 18.76, |
|
"learning_rate": 0.00041613206740227527, |
|
"loss": 0.0055, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 0.0004105722350526046, |
|
"loss": 0.0054, |
|
"step": 49140 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.08839410543441772, |
|
"eval_max_distance": 11, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.367, |
|
"eval_samples_per_second": 136.245, |
|
"eval_steps_per_second": 5.45, |
|
"step": 49362 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"learning_rate": 0.0004050124027029339, |
|
"loss": 0.0057, |
|
"step": 49530 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 0.0003994525703532632, |
|
"loss": 0.0047, |
|
"step": 49920 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 0.0003938927380035925, |
|
"loss": 0.0048, |
|
"step": 50310 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 0.00038833290565392183, |
|
"loss": 0.0052, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 0.00038277307330425117, |
|
"loss": 0.005, |
|
"step": 51090 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"learning_rate": 0.00037721324095458045, |
|
"loss": 0.0048, |
|
"step": 51480 |
|
}, |
|
{ |
|
"epoch": 19.97, |
|
"learning_rate": 0.0003716534086049098, |
|
"loss": 0.0052, |
|
"step": 51870 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.08214738219976425, |
|
"eval_max_distance": 10, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3692, |
|
"eval_samples_per_second": 135.434, |
|
"eval_steps_per_second": 5.417, |
|
"step": 51960 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"learning_rate": 0.0003660935762552391, |
|
"loss": 0.0049, |
|
"step": 52260 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 0.00036053374390556834, |
|
"loss": 0.0043, |
|
"step": 52650 |
|
}, |
|
{ |
|
"epoch": 20.42, |
|
"learning_rate": 0.0003549739115558977, |
|
"loss": 0.0043, |
|
"step": 53040 |
|
}, |
|
{ |
|
"epoch": 20.57, |
|
"learning_rate": 0.000349414079206227, |
|
"loss": 0.0044, |
|
"step": 53430 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"learning_rate": 0.0003438542468565563, |
|
"loss": 0.0044, |
|
"step": 53820 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 0.0003382944145068856, |
|
"loss": 0.0045, |
|
"step": 54210 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.0914614275097847, |
|
"eval_max_distance": 14, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3653, |
|
"eval_samples_per_second": 136.874, |
|
"eval_steps_per_second": 5.475, |
|
"step": 54558 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 0.00033273458215721496, |
|
"loss": 0.0041, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 21.17, |
|
"learning_rate": 0.00032717474980754424, |
|
"loss": 0.0035, |
|
"step": 54990 |
|
}, |
|
{ |
|
"epoch": 21.32, |
|
"learning_rate": 0.0003216149174578736, |
|
"loss": 0.0038, |
|
"step": 55380 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"learning_rate": 0.0003160550851082029, |
|
"loss": 0.0038, |
|
"step": 55770 |
|
}, |
|
{ |
|
"epoch": 21.62, |
|
"learning_rate": 0.0003104952527585322, |
|
"loss": 0.0041, |
|
"step": 56160 |
|
}, |
|
{ |
|
"epoch": 21.77, |
|
"learning_rate": 0.0003049354204088615, |
|
"loss": 0.004, |
|
"step": 56550 |
|
}, |
|
{ |
|
"epoch": 21.92, |
|
"learning_rate": 0.00029937558805919086, |
|
"loss": 0.0037, |
|
"step": 56940 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.09314610809087753, |
|
"eval_max_distance": 14, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3634, |
|
"eval_samples_per_second": 137.604, |
|
"eval_steps_per_second": 5.504, |
|
"step": 57156 |
|
}, |
|
{ |
|
"epoch": 22.07, |
|
"learning_rate": 0.00029381575570952014, |
|
"loss": 0.0037, |
|
"step": 57330 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 0.0002882559233598495, |
|
"loss": 0.0033, |
|
"step": 57720 |
|
}, |
|
{ |
|
"epoch": 22.37, |
|
"learning_rate": 0.0002826960910101788, |
|
"loss": 0.0034, |
|
"step": 58110 |
|
}, |
|
{ |
|
"epoch": 22.52, |
|
"learning_rate": 0.0002771362586605081, |
|
"loss": 0.0034, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"learning_rate": 0.0002715764263108374, |
|
"loss": 0.0035, |
|
"step": 58890 |
|
}, |
|
{ |
|
"epoch": 22.82, |
|
"learning_rate": 0.0002660165939611667, |
|
"loss": 0.0034, |
|
"step": 59280 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"learning_rate": 0.000260456761611496, |
|
"loss": 0.0036, |
|
"step": 59670 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.09405915439128876, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3715, |
|
"eval_samples_per_second": 134.573, |
|
"eval_steps_per_second": 5.383, |
|
"step": 59754 |
|
}, |
|
{ |
|
"epoch": 23.12, |
|
"learning_rate": 0.0002548969292618253, |
|
"loss": 0.003, |
|
"step": 60060 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 0.00024933709691215465, |
|
"loss": 0.0031, |
|
"step": 60450 |
|
}, |
|
{ |
|
"epoch": 23.42, |
|
"learning_rate": 0.00024377726456248396, |
|
"loss": 0.003, |
|
"step": 60840 |
|
}, |
|
{ |
|
"epoch": 23.57, |
|
"learning_rate": 0.00023821743221281327, |
|
"loss": 0.0029, |
|
"step": 61230 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 0.00023265759986314258, |
|
"loss": 0.0028, |
|
"step": 61620 |
|
}, |
|
{ |
|
"epoch": 23.87, |
|
"learning_rate": 0.0002270977675134719, |
|
"loss": 0.0028, |
|
"step": 62010 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.08611776679754257, |
|
"eval_max_distance": 13, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3594, |
|
"eval_samples_per_second": 139.139, |
|
"eval_steps_per_second": 5.566, |
|
"step": 62352 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 0.00022153793516380122, |
|
"loss": 0.0027, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"learning_rate": 0.00021597810281413053, |
|
"loss": 0.0026, |
|
"step": 62790 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 0.00021041827046445986, |
|
"loss": 0.0027, |
|
"step": 63180 |
|
}, |
|
{ |
|
"epoch": 24.47, |
|
"learning_rate": 0.00020485843811478917, |
|
"loss": 0.0027, |
|
"step": 63570 |
|
}, |
|
{ |
|
"epoch": 24.62, |
|
"learning_rate": 0.00019929860576511847, |
|
"loss": 0.0027, |
|
"step": 63960 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"learning_rate": 0.00019373877341544778, |
|
"loss": 0.0024, |
|
"step": 64350 |
|
}, |
|
{ |
|
"epoch": 24.92, |
|
"learning_rate": 0.0001881789410657771, |
|
"loss": 0.0026, |
|
"step": 64740 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.09115344285964966, |
|
"eval_max_distance": 12, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 0.3622, |
|
"eval_samples_per_second": 138.045, |
|
"eval_steps_per_second": 5.522, |
|
"step": 64950 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 0.0001826191087161064, |
|
"loss": 0.0026, |
|
"step": 65130 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 0.00017705927636643573, |
|
"loss": 0.0023, |
|
"step": 65520 |
|
}, |
|
{ |
|
"epoch": 25.37, |
|
"learning_rate": 0.00017149944401676504, |
|
"loss": 0.0023, |
|
"step": 65910 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 0.00016593961166709435, |
|
"loss": 0.0021, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"learning_rate": 0.00016037977931742368, |
|
"loss": 0.0021, |
|
"step": 66690 |
|
}, |
|
{ |
|
"epoch": 25.82, |
|
"learning_rate": 0.000154819946967753, |
|
"loss": 0.0024, |
|
"step": 67080 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"learning_rate": 0.0001492601146180823, |
|
"loss": 0.0024, |
|
"step": 67470 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.09158334881067276, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.3618, |
|
"eval_samples_per_second": 138.208, |
|
"eval_steps_per_second": 5.528, |
|
"step": 67548 |
|
}, |
|
{ |
|
"epoch": 26.12, |
|
"learning_rate": 0.0001437002822684116, |
|
"loss": 0.0021, |
|
"step": 67860 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 0.0001381404499187409, |
|
"loss": 0.0019, |
|
"step": 68250 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 0.00013258061756907022, |
|
"loss": 0.002, |
|
"step": 68640 |
|
}, |
|
{ |
|
"epoch": 26.57, |
|
"learning_rate": 0.00012702078521939955, |
|
"loss": 0.0019, |
|
"step": 69030 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 0.00012146095286972886, |
|
"loss": 0.0021, |
|
"step": 69420 |
|
}, |
|
{ |
|
"epoch": 26.87, |
|
"learning_rate": 0.00011590112052005817, |
|
"loss": 0.002, |
|
"step": 69810 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.08878373354673386, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.3454, |
|
"eval_samples_per_second": 144.754, |
|
"eval_steps_per_second": 5.79, |
|
"step": 70146 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 0.00011034128817038747, |
|
"loss": 0.0021, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 27.17, |
|
"learning_rate": 0.0001047814558207168, |
|
"loss": 0.0017, |
|
"step": 70590 |
|
}, |
|
{ |
|
"epoch": 27.32, |
|
"learning_rate": 9.92216234710461e-05, |
|
"loss": 0.0018, |
|
"step": 70980 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"learning_rate": 9.366179112137542e-05, |
|
"loss": 0.0017, |
|
"step": 71370 |
|
}, |
|
{ |
|
"epoch": 27.62, |
|
"learning_rate": 8.810195877170473e-05, |
|
"loss": 0.0016, |
|
"step": 71760 |
|
}, |
|
{ |
|
"epoch": 27.77, |
|
"learning_rate": 8.254212642203404e-05, |
|
"loss": 0.002, |
|
"step": 72150 |
|
}, |
|
{ |
|
"epoch": 27.92, |
|
"learning_rate": 7.698229407236336e-05, |
|
"loss": 0.0017, |
|
"step": 72540 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.08879587054252625, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.3476, |
|
"eval_samples_per_second": 143.846, |
|
"eval_steps_per_second": 5.754, |
|
"step": 72744 |
|
}, |
|
{ |
|
"epoch": 28.07, |
|
"learning_rate": 7.142246172269268e-05, |
|
"loss": 0.0016, |
|
"step": 72930 |
|
}, |
|
{ |
|
"epoch": 28.22, |
|
"learning_rate": 6.586262937302199e-05, |
|
"loss": 0.0016, |
|
"step": 73320 |
|
}, |
|
{ |
|
"epoch": 28.37, |
|
"learning_rate": 6.03027970233513e-05, |
|
"loss": 0.0016, |
|
"step": 73710 |
|
}, |
|
{ |
|
"epoch": 28.52, |
|
"learning_rate": 5.474296467368061e-05, |
|
"loss": 0.0016, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 28.67, |
|
"learning_rate": 4.9183132324009924e-05, |
|
"loss": 0.0016, |
|
"step": 74490 |
|
}, |
|
{ |
|
"epoch": 28.82, |
|
"learning_rate": 4.362329997433924e-05, |
|
"loss": 0.0017, |
|
"step": 74880 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 3.806346762466855e-05, |
|
"loss": 0.0017, |
|
"step": 75270 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.09515639394521713, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.343, |
|
"eval_samples_per_second": 145.752, |
|
"eval_steps_per_second": 5.83, |
|
"step": 75342 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 3.250363527499786e-05, |
|
"loss": 0.0015, |
|
"step": 75660 |
|
}, |
|
{ |
|
"epoch": 29.27, |
|
"learning_rate": 2.6943802925327177e-05, |
|
"loss": 0.0016, |
|
"step": 76050 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"learning_rate": 2.1383970575656488e-05, |
|
"loss": 0.0014, |
|
"step": 76440 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 1.5824138225985802e-05, |
|
"loss": 0.0013, |
|
"step": 76830 |
|
}, |
|
{ |
|
"epoch": 29.72, |
|
"learning_rate": 1.0264305876315115e-05, |
|
"loss": 0.0014, |
|
"step": 77220 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"learning_rate": 4.704473526644427e-06, |
|
"loss": 0.0014, |
|
"step": 77610 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.09847646951675415, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.3435, |
|
"eval_samples_per_second": 145.564, |
|
"eval_steps_per_second": 5.823, |
|
"step": 77940 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 77940, |
|
"total_flos": 4.517674593940685e+16, |
|
"train_loss": 0.053724035134690526, |
|
"train_runtime": 6582.4117, |
|
"train_samples_per_second": 355.137, |
|
"train_steps_per_second": 11.841 |
|
} |
|
], |
|
"logging_steps": 390, |
|
"max_steps": 77940, |
|
"num_train_epochs": 30, |
|
"save_steps": 780, |
|
"total_flos": 4.517674593940685e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|