text-normalization-ru-new / trainer_state.json
alexue4's picture
End of training
1ce4365
raw
history blame
33.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 77940,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.2830382345393893e-07,
"loss": 13.6904,
"step": 1
},
{
"epoch": 0.15,
"learning_rate": 5.003849114703618e-05,
"loss": 5.273,
"step": 390
},
{
"epoch": 0.3,
"learning_rate": 0.00010007698229407236,
"loss": 0.4624,
"step": 780
},
{
"epoch": 0.45,
"learning_rate": 0.00015011547344110854,
"loss": 0.3258,
"step": 1170
},
{
"epoch": 0.6,
"learning_rate": 0.00020015396458814472,
"loss": 0.2553,
"step": 1560
},
{
"epoch": 0.75,
"learning_rate": 0.0002501924557351809,
"loss": 0.2143,
"step": 1950
},
{
"epoch": 0.9,
"learning_rate": 0.0003002309468822171,
"loss": 0.181,
"step": 2340
},
{
"epoch": 1.0,
"eval_loss": 0.21401263773441315,
"eval_max_distance": 36,
"eval_mean_distance": 4,
"eval_runtime": 0.4225,
"eval_samples_per_second": 118.338,
"eval_steps_per_second": 4.734,
"step": 2598
},
{
"epoch": 1.05,
"learning_rate": 0.0003502694380292533,
"loss": 0.1557,
"step": 2730
},
{
"epoch": 1.2,
"learning_rate": 0.00040030792917628943,
"loss": 0.1402,
"step": 3120
},
{
"epoch": 1.35,
"learning_rate": 0.00045034642032332564,
"loss": 0.1283,
"step": 3510
},
{
"epoch": 1.5,
"learning_rate": 0.0005003849114703618,
"loss": 0.1194,
"step": 3900
},
{
"epoch": 1.65,
"learning_rate": 0.0005504234026173979,
"loss": 0.1158,
"step": 4290
},
{
"epoch": 1.8,
"learning_rate": 0.0006004618937644341,
"loss": 0.108,
"step": 4680
},
{
"epoch": 1.95,
"learning_rate": 0.0006505003849114704,
"loss": 0.1067,
"step": 5070
},
{
"epoch": 2.0,
"eval_loss": 0.14157189428806305,
"eval_max_distance": 29,
"eval_mean_distance": 2,
"eval_runtime": 0.4066,
"eval_samples_per_second": 122.96,
"eval_steps_per_second": 4.918,
"step": 5196
},
{
"epoch": 2.1,
"learning_rate": 0.0007005388760585066,
"loss": 0.0924,
"step": 5460
},
{
"epoch": 2.25,
"learning_rate": 0.0007505773672055427,
"loss": 0.0927,
"step": 5850
},
{
"epoch": 2.4,
"learning_rate": 0.0008006158583525789,
"loss": 0.0872,
"step": 6240
},
{
"epoch": 2.55,
"learning_rate": 0.0008506543494996151,
"loss": 0.0841,
"step": 6630
},
{
"epoch": 2.7,
"learning_rate": 0.0009006928406466513,
"loss": 0.0808,
"step": 7020
},
{
"epoch": 2.85,
"learning_rate": 0.0009507313317936874,
"loss": 0.0801,
"step": 7410
},
{
"epoch": 3.0,
"eval_loss": 0.10980188101530075,
"eval_max_distance": 22,
"eval_mean_distance": 2,
"eval_runtime": 0.4052,
"eval_samples_per_second": 123.401,
"eval_steps_per_second": 4.936,
"step": 7794
},
{
"epoch": 3.0,
"learning_rate": 0.0009999144641176974,
"loss": 0.0778,
"step": 7800
},
{
"epoch": 3.15,
"learning_rate": 0.0009943546317680268,
"loss": 0.0684,
"step": 8190
},
{
"epoch": 3.3,
"learning_rate": 0.000988794799418356,
"loss": 0.0692,
"step": 8580
},
{
"epoch": 3.45,
"learning_rate": 0.0009832349670686853,
"loss": 0.0629,
"step": 8970
},
{
"epoch": 3.6,
"learning_rate": 0.0009776751347190145,
"loss": 0.0653,
"step": 9360
},
{
"epoch": 3.75,
"learning_rate": 0.0009721153023693439,
"loss": 0.0595,
"step": 9750
},
{
"epoch": 3.9,
"learning_rate": 0.0009665554700196733,
"loss": 0.0575,
"step": 10140
},
{
"epoch": 4.0,
"eval_loss": 0.10807737708091736,
"eval_max_distance": 18,
"eval_mean_distance": 2,
"eval_runtime": 0.3927,
"eval_samples_per_second": 127.308,
"eval_steps_per_second": 5.092,
"step": 10392
},
{
"epoch": 4.05,
"learning_rate": 0.0009609956376700025,
"loss": 0.1012,
"step": 10530
},
{
"epoch": 4.2,
"learning_rate": 0.0009554358053203319,
"loss": 0.0526,
"step": 10920
},
{
"epoch": 4.35,
"learning_rate": 0.0009498759729706612,
"loss": 0.0496,
"step": 11310
},
{
"epoch": 4.5,
"learning_rate": 0.0009443161406209905,
"loss": 0.0492,
"step": 11700
},
{
"epoch": 4.65,
"learning_rate": 0.0009387563082713198,
"loss": 0.0483,
"step": 12090
},
{
"epoch": 4.8,
"learning_rate": 0.0009331964759216492,
"loss": 0.0469,
"step": 12480
},
{
"epoch": 4.95,
"learning_rate": 0.0009276366435719784,
"loss": 0.0452,
"step": 12870
},
{
"epoch": 5.0,
"eval_loss": 0.08966636657714844,
"eval_max_distance": 14,
"eval_mean_distance": 1,
"eval_runtime": 0.3879,
"eval_samples_per_second": 128.906,
"eval_steps_per_second": 5.156,
"step": 12990
},
{
"epoch": 5.1,
"learning_rate": 0.0009220768112223078,
"loss": 0.0392,
"step": 13260
},
{
"epoch": 5.25,
"learning_rate": 0.0009165169788726371,
"loss": 0.0397,
"step": 13650
},
{
"epoch": 5.4,
"learning_rate": 0.0009109571465229664,
"loss": 0.0393,
"step": 14040
},
{
"epoch": 5.55,
"learning_rate": 0.0009053973141732957,
"loss": 0.0399,
"step": 14430
},
{
"epoch": 5.7,
"learning_rate": 0.0008998374818236251,
"loss": 0.039,
"step": 14820
},
{
"epoch": 5.85,
"learning_rate": 0.0008942776494739543,
"loss": 0.0372,
"step": 15210
},
{
"epoch": 6.0,
"eval_loss": 0.07197271287441254,
"eval_max_distance": 15,
"eval_mean_distance": 1,
"eval_runtime": 0.3764,
"eval_samples_per_second": 132.832,
"eval_steps_per_second": 5.313,
"step": 15588
},
{
"epoch": 6.0,
"learning_rate": 0.0008887178171242837,
"loss": 0.039,
"step": 15600
},
{
"epoch": 6.15,
"learning_rate": 0.000883157984774613,
"loss": 0.0312,
"step": 15990
},
{
"epoch": 6.3,
"learning_rate": 0.0008775981524249422,
"loss": 0.0337,
"step": 16380
},
{
"epoch": 6.45,
"learning_rate": 0.0008720383200752716,
"loss": 0.0328,
"step": 16770
},
{
"epoch": 6.61,
"learning_rate": 0.0008664784877256009,
"loss": 0.0327,
"step": 17160
},
{
"epoch": 6.76,
"learning_rate": 0.0008609186553759302,
"loss": 0.0305,
"step": 17550
},
{
"epoch": 6.91,
"learning_rate": 0.0008553588230262595,
"loss": 0.0323,
"step": 17940
},
{
"epoch": 7.0,
"eval_loss": 0.08398188650608063,
"eval_max_distance": 12,
"eval_mean_distance": 1,
"eval_runtime": 0.3765,
"eval_samples_per_second": 132.8,
"eval_steps_per_second": 5.312,
"step": 18186
},
{
"epoch": 7.06,
"learning_rate": 0.0008497989906765889,
"loss": 0.0286,
"step": 18330
},
{
"epoch": 7.21,
"learning_rate": 0.0008442391583269181,
"loss": 0.0263,
"step": 18720
},
{
"epoch": 7.36,
"learning_rate": 0.0008386793259772475,
"loss": 0.0269,
"step": 19110
},
{
"epoch": 7.51,
"learning_rate": 0.0008331194936275768,
"loss": 0.0268,
"step": 19500
},
{
"epoch": 7.66,
"learning_rate": 0.0008275596612779061,
"loss": 0.0283,
"step": 19890
},
{
"epoch": 7.81,
"learning_rate": 0.0008219998289282354,
"loss": 0.0274,
"step": 20280
},
{
"epoch": 7.96,
"learning_rate": 0.0008164399965785648,
"loss": 0.0267,
"step": 20670
},
{
"epoch": 8.0,
"eval_loss": 0.07682657241821289,
"eval_max_distance": 16,
"eval_mean_distance": 1,
"eval_runtime": 0.3687,
"eval_samples_per_second": 135.62,
"eval_steps_per_second": 5.425,
"step": 20784
},
{
"epoch": 8.11,
"learning_rate": 0.000810880164228894,
"loss": 0.0235,
"step": 21060
},
{
"epoch": 8.26,
"learning_rate": 0.0008053203318792234,
"loss": 0.0221,
"step": 21450
},
{
"epoch": 8.41,
"learning_rate": 0.0007997604995295527,
"loss": 0.0221,
"step": 21840
},
{
"epoch": 8.56,
"learning_rate": 0.0007942006671798819,
"loss": 0.0223,
"step": 22230
},
{
"epoch": 8.71,
"learning_rate": 0.0007886408348302113,
"loss": 0.0233,
"step": 22620
},
{
"epoch": 8.86,
"learning_rate": 0.0007830810024805405,
"loss": 0.0231,
"step": 23010
},
{
"epoch": 9.0,
"eval_loss": 0.06973634660243988,
"eval_max_distance": 10,
"eval_mean_distance": 1,
"eval_runtime": 0.3759,
"eval_samples_per_second": 133.026,
"eval_steps_per_second": 5.321,
"step": 23382
},
{
"epoch": 9.01,
"learning_rate": 0.0007775211701308699,
"loss": 0.0227,
"step": 23400
},
{
"epoch": 9.16,
"learning_rate": 0.0007719613377811992,
"loss": 0.0185,
"step": 23790
},
{
"epoch": 9.31,
"learning_rate": 0.0007664015054315285,
"loss": 0.0183,
"step": 24180
},
{
"epoch": 9.46,
"learning_rate": 0.0007608416730818578,
"loss": 0.0191,
"step": 24570
},
{
"epoch": 9.61,
"learning_rate": 0.0007552818407321872,
"loss": 0.019,
"step": 24960
},
{
"epoch": 9.76,
"learning_rate": 0.0007497220083825164,
"loss": 0.0193,
"step": 25350
},
{
"epoch": 9.91,
"learning_rate": 0.0007441621760328458,
"loss": 0.0199,
"step": 25740
},
{
"epoch": 10.0,
"eval_loss": 0.07169829308986664,
"eval_max_distance": 9,
"eval_mean_distance": 1,
"eval_runtime": 0.3704,
"eval_samples_per_second": 134.993,
"eval_steps_per_second": 5.4,
"step": 25980
},
{
"epoch": 10.06,
"learning_rate": 0.0007386023436831751,
"loss": 0.0184,
"step": 26130
},
{
"epoch": 10.21,
"learning_rate": 0.0007330425113335044,
"loss": 0.016,
"step": 26520
},
{
"epoch": 10.36,
"learning_rate": 0.0007274826789838337,
"loss": 0.0164,
"step": 26910
},
{
"epoch": 10.51,
"learning_rate": 0.0007219228466341631,
"loss": 0.016,
"step": 27300
},
{
"epoch": 10.66,
"learning_rate": 0.0007163630142844923,
"loss": 0.0169,
"step": 27690
},
{
"epoch": 10.81,
"learning_rate": 0.0007108031819348217,
"loss": 0.0165,
"step": 28080
},
{
"epoch": 10.96,
"learning_rate": 0.000705243349585151,
"loss": 0.0168,
"step": 28470
},
{
"epoch": 11.0,
"eval_loss": 0.08123478293418884,
"eval_max_distance": 16,
"eval_mean_distance": 1,
"eval_runtime": 0.3865,
"eval_samples_per_second": 129.356,
"eval_steps_per_second": 5.174,
"step": 28578
},
{
"epoch": 11.11,
"learning_rate": 0.0006996835172354803,
"loss": 0.015,
"step": 28860
},
{
"epoch": 11.26,
"learning_rate": 0.0006941236848858096,
"loss": 0.0137,
"step": 29250
},
{
"epoch": 11.41,
"learning_rate": 0.0006885638525361389,
"loss": 0.0151,
"step": 29640
},
{
"epoch": 11.56,
"learning_rate": 0.0006830040201864682,
"loss": 0.0144,
"step": 30030
},
{
"epoch": 11.71,
"learning_rate": 0.0006774441878367975,
"loss": 0.0147,
"step": 30420
},
{
"epoch": 11.86,
"learning_rate": 0.0006718843554871269,
"loss": 0.0148,
"step": 30810
},
{
"epoch": 12.0,
"eval_loss": 0.09610763192176819,
"eval_max_distance": 12,
"eval_mean_distance": 1,
"eval_runtime": 0.3633,
"eval_samples_per_second": 137.639,
"eval_steps_per_second": 5.506,
"step": 31176
},
{
"epoch": 12.01,
"learning_rate": 0.0006663245231374561,
"loss": 0.0145,
"step": 31200
},
{
"epoch": 12.16,
"learning_rate": 0.0006607646907877855,
"loss": 0.0124,
"step": 31590
},
{
"epoch": 12.31,
"learning_rate": 0.0006552048584381148,
"loss": 0.0117,
"step": 31980
},
{
"epoch": 12.46,
"learning_rate": 0.0006496450260884441,
"loss": 0.0121,
"step": 32370
},
{
"epoch": 12.61,
"learning_rate": 0.0006440851937387734,
"loss": 0.0124,
"step": 32760
},
{
"epoch": 12.76,
"learning_rate": 0.0006385253613891028,
"loss": 0.0125,
"step": 33150
},
{
"epoch": 12.91,
"learning_rate": 0.000632965529039432,
"loss": 0.0128,
"step": 33540
},
{
"epoch": 13.0,
"eval_loss": 0.08225859701633453,
"eval_max_distance": 9,
"eval_mean_distance": 1,
"eval_runtime": 0.3712,
"eval_samples_per_second": 134.695,
"eval_steps_per_second": 5.388,
"step": 33774
},
{
"epoch": 13.06,
"learning_rate": 0.0006274056966897614,
"loss": 0.0116,
"step": 33930
},
{
"epoch": 13.21,
"learning_rate": 0.0006218458643400907,
"loss": 0.0106,
"step": 34320
},
{
"epoch": 13.36,
"learning_rate": 0.00061628603199042,
"loss": 0.0104,
"step": 34710
},
{
"epoch": 13.51,
"learning_rate": 0.0006107261996407493,
"loss": 0.011,
"step": 35100
},
{
"epoch": 13.66,
"learning_rate": 0.0006051663672910787,
"loss": 0.0108,
"step": 35490
},
{
"epoch": 13.81,
"learning_rate": 0.0005996065349414079,
"loss": 0.0111,
"step": 35880
},
{
"epoch": 13.96,
"learning_rate": 0.0005940467025917372,
"loss": 0.0112,
"step": 36270
},
{
"epoch": 14.0,
"eval_loss": 0.07655028253793716,
"eval_max_distance": 12,
"eval_mean_distance": 1,
"eval_runtime": 0.361,
"eval_samples_per_second": 138.506,
"eval_steps_per_second": 5.54,
"step": 36372
},
{
"epoch": 14.11,
"learning_rate": 0.0005884868702420666,
"loss": 0.0098,
"step": 36660
},
{
"epoch": 14.26,
"learning_rate": 0.0005829270378923958,
"loss": 0.009,
"step": 37050
},
{
"epoch": 14.41,
"learning_rate": 0.0005773672055427252,
"loss": 0.0093,
"step": 37440
},
{
"epoch": 14.56,
"learning_rate": 0.0005718073731930545,
"loss": 0.0095,
"step": 37830
},
{
"epoch": 14.71,
"learning_rate": 0.0005662475408433838,
"loss": 0.0093,
"step": 38220
},
{
"epoch": 14.86,
"learning_rate": 0.0005606877084937131,
"loss": 0.0093,
"step": 38610
},
{
"epoch": 15.0,
"eval_loss": 0.07127052545547485,
"eval_max_distance": 9,
"eval_mean_distance": 1,
"eval_runtime": 0.3671,
"eval_samples_per_second": 136.219,
"eval_steps_per_second": 5.449,
"step": 38970
},
{
"epoch": 15.01,
"learning_rate": 0.0005551278761440425,
"loss": 0.0101,
"step": 39000
},
{
"epoch": 15.16,
"learning_rate": 0.0005495680437943717,
"loss": 0.0078,
"step": 39390
},
{
"epoch": 15.31,
"learning_rate": 0.0005440082114447011,
"loss": 0.0079,
"step": 39780
},
{
"epoch": 15.46,
"learning_rate": 0.0005384483790950304,
"loss": 0.0081,
"step": 40170
},
{
"epoch": 15.61,
"learning_rate": 0.0005328885467453597,
"loss": 0.0085,
"step": 40560
},
{
"epoch": 15.76,
"learning_rate": 0.000527328714395689,
"loss": 0.0088,
"step": 40950
},
{
"epoch": 15.91,
"learning_rate": 0.0005217688820460184,
"loss": 0.0083,
"step": 41340
},
{
"epoch": 16.0,
"eval_loss": 0.08469703793525696,
"eval_max_distance": 14,
"eval_mean_distance": 1,
"eval_runtime": 0.3815,
"eval_samples_per_second": 131.073,
"eval_steps_per_second": 5.243,
"step": 41568
},
{
"epoch": 16.06,
"learning_rate": 0.0005162090496963476,
"loss": 0.0081,
"step": 41730
},
{
"epoch": 16.21,
"learning_rate": 0.000510649217346677,
"loss": 0.0069,
"step": 42120
},
{
"epoch": 16.36,
"learning_rate": 0.0005050893849970063,
"loss": 0.007,
"step": 42510
},
{
"epoch": 16.51,
"learning_rate": 0.0004995295526473355,
"loss": 0.0071,
"step": 42900
},
{
"epoch": 16.66,
"learning_rate": 0.0004939697202976649,
"loss": 0.0073,
"step": 43290
},
{
"epoch": 16.81,
"learning_rate": 0.0004884098879479942,
"loss": 0.0076,
"step": 43680
},
{
"epoch": 16.96,
"learning_rate": 0.0004828500555983235,
"loss": 0.0076,
"step": 44070
},
{
"epoch": 17.0,
"eval_loss": 0.08625645935535431,
"eval_max_distance": 11,
"eval_mean_distance": 1,
"eval_runtime": 0.3551,
"eval_samples_per_second": 140.8,
"eval_steps_per_second": 5.632,
"step": 44166
},
{
"epoch": 17.11,
"learning_rate": 0.00047729022324865286,
"loss": 0.0064,
"step": 44460
},
{
"epoch": 17.26,
"learning_rate": 0.00047173039089898214,
"loss": 0.0059,
"step": 44850
},
{
"epoch": 17.41,
"learning_rate": 0.0004661705585493115,
"loss": 0.0064,
"step": 45240
},
{
"epoch": 17.56,
"learning_rate": 0.0004606107261996408,
"loss": 0.0068,
"step": 45630
},
{
"epoch": 17.71,
"learning_rate": 0.00045505089384997004,
"loss": 0.0066,
"step": 46020
},
{
"epoch": 17.86,
"learning_rate": 0.00044949106150029937,
"loss": 0.0064,
"step": 46410
},
{
"epoch": 18.0,
"eval_loss": 0.08296500891447067,
"eval_max_distance": 14,
"eval_mean_distance": 1,
"eval_runtime": 0.3721,
"eval_samples_per_second": 134.372,
"eval_steps_per_second": 5.375,
"step": 46764
},
{
"epoch": 18.01,
"learning_rate": 0.0004439312291506287,
"loss": 0.0065,
"step": 46800
},
{
"epoch": 18.16,
"learning_rate": 0.000438371396800958,
"loss": 0.0055,
"step": 47190
},
{
"epoch": 18.31,
"learning_rate": 0.0004328115644512873,
"loss": 0.0052,
"step": 47580
},
{
"epoch": 18.46,
"learning_rate": 0.00042725173210161665,
"loss": 0.0057,
"step": 47970
},
{
"epoch": 18.61,
"learning_rate": 0.00042169189975194593,
"loss": 0.006,
"step": 48360
},
{
"epoch": 18.76,
"learning_rate": 0.00041613206740227527,
"loss": 0.0055,
"step": 48750
},
{
"epoch": 18.91,
"learning_rate": 0.0004105722350526046,
"loss": 0.0054,
"step": 49140
},
{
"epoch": 19.0,
"eval_loss": 0.08839410543441772,
"eval_max_distance": 11,
"eval_mean_distance": 1,
"eval_runtime": 0.367,
"eval_samples_per_second": 136.245,
"eval_steps_per_second": 5.45,
"step": 49362
},
{
"epoch": 19.06,
"learning_rate": 0.0004050124027029339,
"loss": 0.0057,
"step": 49530
},
{
"epoch": 19.21,
"learning_rate": 0.0003994525703532632,
"loss": 0.0047,
"step": 49920
},
{
"epoch": 19.36,
"learning_rate": 0.0003938927380035925,
"loss": 0.0048,
"step": 50310
},
{
"epoch": 19.52,
"learning_rate": 0.00038833290565392183,
"loss": 0.0052,
"step": 50700
},
{
"epoch": 19.67,
"learning_rate": 0.00038277307330425117,
"loss": 0.005,
"step": 51090
},
{
"epoch": 19.82,
"learning_rate": 0.00037721324095458045,
"loss": 0.0048,
"step": 51480
},
{
"epoch": 19.97,
"learning_rate": 0.0003716534086049098,
"loss": 0.0052,
"step": 51870
},
{
"epoch": 20.0,
"eval_loss": 0.08214738219976425,
"eval_max_distance": 10,
"eval_mean_distance": 1,
"eval_runtime": 0.3692,
"eval_samples_per_second": 135.434,
"eval_steps_per_second": 5.417,
"step": 51960
},
{
"epoch": 20.12,
"learning_rate": 0.0003660935762552391,
"loss": 0.0049,
"step": 52260
},
{
"epoch": 20.27,
"learning_rate": 0.00036053374390556834,
"loss": 0.0043,
"step": 52650
},
{
"epoch": 20.42,
"learning_rate": 0.0003549739115558977,
"loss": 0.0043,
"step": 53040
},
{
"epoch": 20.57,
"learning_rate": 0.000349414079206227,
"loss": 0.0044,
"step": 53430
},
{
"epoch": 20.72,
"learning_rate": 0.0003438542468565563,
"loss": 0.0044,
"step": 53820
},
{
"epoch": 20.87,
"learning_rate": 0.0003382944145068856,
"loss": 0.0045,
"step": 54210
},
{
"epoch": 21.0,
"eval_loss": 0.0914614275097847,
"eval_max_distance": 14,
"eval_mean_distance": 1,
"eval_runtime": 0.3653,
"eval_samples_per_second": 136.874,
"eval_steps_per_second": 5.475,
"step": 54558
},
{
"epoch": 21.02,
"learning_rate": 0.00033273458215721496,
"loss": 0.0041,
"step": 54600
},
{
"epoch": 21.17,
"learning_rate": 0.00032717474980754424,
"loss": 0.0035,
"step": 54990
},
{
"epoch": 21.32,
"learning_rate": 0.0003216149174578736,
"loss": 0.0038,
"step": 55380
},
{
"epoch": 21.47,
"learning_rate": 0.0003160550851082029,
"loss": 0.0038,
"step": 55770
},
{
"epoch": 21.62,
"learning_rate": 0.0003104952527585322,
"loss": 0.0041,
"step": 56160
},
{
"epoch": 21.77,
"learning_rate": 0.0003049354204088615,
"loss": 0.004,
"step": 56550
},
{
"epoch": 21.92,
"learning_rate": 0.00029937558805919086,
"loss": 0.0037,
"step": 56940
},
{
"epoch": 22.0,
"eval_loss": 0.09314610809087753,
"eval_max_distance": 14,
"eval_mean_distance": 1,
"eval_runtime": 0.3634,
"eval_samples_per_second": 137.604,
"eval_steps_per_second": 5.504,
"step": 57156
},
{
"epoch": 22.07,
"learning_rate": 0.00029381575570952014,
"loss": 0.0037,
"step": 57330
},
{
"epoch": 22.22,
"learning_rate": 0.0002882559233598495,
"loss": 0.0033,
"step": 57720
},
{
"epoch": 22.37,
"learning_rate": 0.0002826960910101788,
"loss": 0.0034,
"step": 58110
},
{
"epoch": 22.52,
"learning_rate": 0.0002771362586605081,
"loss": 0.0034,
"step": 58500
},
{
"epoch": 22.67,
"learning_rate": 0.0002715764263108374,
"loss": 0.0035,
"step": 58890
},
{
"epoch": 22.82,
"learning_rate": 0.0002660165939611667,
"loss": 0.0034,
"step": 59280
},
{
"epoch": 22.97,
"learning_rate": 0.000260456761611496,
"loss": 0.0036,
"step": 59670
},
{
"epoch": 23.0,
"eval_loss": 0.09405915439128876,
"eval_max_distance": 9,
"eval_mean_distance": 1,
"eval_runtime": 0.3715,
"eval_samples_per_second": 134.573,
"eval_steps_per_second": 5.383,
"step": 59754
},
{
"epoch": 23.12,
"learning_rate": 0.0002548969292618253,
"loss": 0.003,
"step": 60060
},
{
"epoch": 23.27,
"learning_rate": 0.00024933709691215465,
"loss": 0.0031,
"step": 60450
},
{
"epoch": 23.42,
"learning_rate": 0.00024377726456248396,
"loss": 0.003,
"step": 60840
},
{
"epoch": 23.57,
"learning_rate": 0.00023821743221281327,
"loss": 0.0029,
"step": 61230
},
{
"epoch": 23.72,
"learning_rate": 0.00023265759986314258,
"loss": 0.0028,
"step": 61620
},
{
"epoch": 23.87,
"learning_rate": 0.0002270977675134719,
"loss": 0.0028,
"step": 62010
},
{
"epoch": 24.0,
"eval_loss": 0.08611776679754257,
"eval_max_distance": 13,
"eval_mean_distance": 1,
"eval_runtime": 0.3594,
"eval_samples_per_second": 139.139,
"eval_steps_per_second": 5.566,
"step": 62352
},
{
"epoch": 24.02,
"learning_rate": 0.00022153793516380122,
"loss": 0.0027,
"step": 62400
},
{
"epoch": 24.17,
"learning_rate": 0.00021597810281413053,
"loss": 0.0026,
"step": 62790
},
{
"epoch": 24.32,
"learning_rate": 0.00021041827046445986,
"loss": 0.0027,
"step": 63180
},
{
"epoch": 24.47,
"learning_rate": 0.00020485843811478917,
"loss": 0.0027,
"step": 63570
},
{
"epoch": 24.62,
"learning_rate": 0.00019929860576511847,
"loss": 0.0027,
"step": 63960
},
{
"epoch": 24.77,
"learning_rate": 0.00019373877341544778,
"loss": 0.0024,
"step": 64350
},
{
"epoch": 24.92,
"learning_rate": 0.0001881789410657771,
"loss": 0.0026,
"step": 64740
},
{
"epoch": 25.0,
"eval_loss": 0.09115344285964966,
"eval_max_distance": 12,
"eval_mean_distance": 1,
"eval_runtime": 0.3622,
"eval_samples_per_second": 138.045,
"eval_steps_per_second": 5.522,
"step": 64950
},
{
"epoch": 25.07,
"learning_rate": 0.0001826191087161064,
"loss": 0.0026,
"step": 65130
},
{
"epoch": 25.22,
"learning_rate": 0.00017705927636643573,
"loss": 0.0023,
"step": 65520
},
{
"epoch": 25.37,
"learning_rate": 0.00017149944401676504,
"loss": 0.0023,
"step": 65910
},
{
"epoch": 25.52,
"learning_rate": 0.00016593961166709435,
"loss": 0.0021,
"step": 66300
},
{
"epoch": 25.67,
"learning_rate": 0.00016037977931742368,
"loss": 0.0021,
"step": 66690
},
{
"epoch": 25.82,
"learning_rate": 0.000154819946967753,
"loss": 0.0024,
"step": 67080
},
{
"epoch": 25.97,
"learning_rate": 0.0001492601146180823,
"loss": 0.0024,
"step": 67470
},
{
"epoch": 26.0,
"eval_loss": 0.09158334881067276,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 0.3618,
"eval_samples_per_second": 138.208,
"eval_steps_per_second": 5.528,
"step": 67548
},
{
"epoch": 26.12,
"learning_rate": 0.0001437002822684116,
"loss": 0.0021,
"step": 67860
},
{
"epoch": 26.27,
"learning_rate": 0.0001381404499187409,
"loss": 0.0019,
"step": 68250
},
{
"epoch": 26.42,
"learning_rate": 0.00013258061756907022,
"loss": 0.002,
"step": 68640
},
{
"epoch": 26.57,
"learning_rate": 0.00012702078521939955,
"loss": 0.0019,
"step": 69030
},
{
"epoch": 26.72,
"learning_rate": 0.00012146095286972886,
"loss": 0.0021,
"step": 69420
},
{
"epoch": 26.87,
"learning_rate": 0.00011590112052005817,
"loss": 0.002,
"step": 69810
},
{
"epoch": 27.0,
"eval_loss": 0.08878373354673386,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 0.3454,
"eval_samples_per_second": 144.754,
"eval_steps_per_second": 5.79,
"step": 70146
},
{
"epoch": 27.02,
"learning_rate": 0.00011034128817038747,
"loss": 0.0021,
"step": 70200
},
{
"epoch": 27.17,
"learning_rate": 0.0001047814558207168,
"loss": 0.0017,
"step": 70590
},
{
"epoch": 27.32,
"learning_rate": 9.92216234710461e-05,
"loss": 0.0018,
"step": 70980
},
{
"epoch": 27.47,
"learning_rate": 9.366179112137542e-05,
"loss": 0.0017,
"step": 71370
},
{
"epoch": 27.62,
"learning_rate": 8.810195877170473e-05,
"loss": 0.0016,
"step": 71760
},
{
"epoch": 27.77,
"learning_rate": 8.254212642203404e-05,
"loss": 0.002,
"step": 72150
},
{
"epoch": 27.92,
"learning_rate": 7.698229407236336e-05,
"loss": 0.0017,
"step": 72540
},
{
"epoch": 28.0,
"eval_loss": 0.08879587054252625,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 0.3476,
"eval_samples_per_second": 143.846,
"eval_steps_per_second": 5.754,
"step": 72744
},
{
"epoch": 28.07,
"learning_rate": 7.142246172269268e-05,
"loss": 0.0016,
"step": 72930
},
{
"epoch": 28.22,
"learning_rate": 6.586262937302199e-05,
"loss": 0.0016,
"step": 73320
},
{
"epoch": 28.37,
"learning_rate": 6.03027970233513e-05,
"loss": 0.0016,
"step": 73710
},
{
"epoch": 28.52,
"learning_rate": 5.474296467368061e-05,
"loss": 0.0016,
"step": 74100
},
{
"epoch": 28.67,
"learning_rate": 4.9183132324009924e-05,
"loss": 0.0016,
"step": 74490
},
{
"epoch": 28.82,
"learning_rate": 4.362329997433924e-05,
"loss": 0.0017,
"step": 74880
},
{
"epoch": 28.97,
"learning_rate": 3.806346762466855e-05,
"loss": 0.0017,
"step": 75270
},
{
"epoch": 29.0,
"eval_loss": 0.09515639394521713,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 0.343,
"eval_samples_per_second": 145.752,
"eval_steps_per_second": 5.83,
"step": 75342
},
{
"epoch": 29.12,
"learning_rate": 3.250363527499786e-05,
"loss": 0.0015,
"step": 75660
},
{
"epoch": 29.27,
"learning_rate": 2.6943802925327177e-05,
"loss": 0.0016,
"step": 76050
},
{
"epoch": 29.42,
"learning_rate": 2.1383970575656488e-05,
"loss": 0.0014,
"step": 76440
},
{
"epoch": 29.57,
"learning_rate": 1.5824138225985802e-05,
"loss": 0.0013,
"step": 76830
},
{
"epoch": 29.72,
"learning_rate": 1.0264305876315115e-05,
"loss": 0.0014,
"step": 77220
},
{
"epoch": 29.87,
"learning_rate": 4.704473526644427e-06,
"loss": 0.0014,
"step": 77610
},
{
"epoch": 30.0,
"eval_loss": 0.09847646951675415,
"eval_max_distance": 9,
"eval_mean_distance": 0,
"eval_runtime": 0.3435,
"eval_samples_per_second": 145.564,
"eval_steps_per_second": 5.823,
"step": 77940
},
{
"epoch": 30.0,
"step": 77940,
"total_flos": 4.517674593940685e+16,
"train_loss": 0.053724035134690526,
"train_runtime": 6582.4117,
"train_samples_per_second": 355.137,
"train_steps_per_second": 11.841
}
],
"logging_steps": 390,
"max_steps": 77940,
"num_train_epochs": 30,
"save_steps": 780,
"total_flos": 4.517674593940685e+16,
"trial_name": null,
"trial_params": null
}