text-normalization-ru-new / trainer_state.json
alexue4's picture
End of training
e06c4d5
raw
history blame
43.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 461010,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.8920122621319915e-08,
"loss": 14.3504,
"step": 1
},
{
"epoch": 0.15,
"learning_rate": 5.000289201226213e-05,
"loss": 2.1001,
"step": 1729
},
{
"epoch": 0.3,
"learning_rate": 0.00010000578402452426,
"loss": 0.3756,
"step": 3458
},
{
"epoch": 0.45,
"learning_rate": 0.0001500086760367864,
"loss": 0.2527,
"step": 5187
},
{
"epoch": 0.6,
"learning_rate": 0.00020001156804904852,
"loss": 0.2076,
"step": 6916
},
{
"epoch": 0.75,
"learning_rate": 0.00025001446006131067,
"loss": 0.1864,
"step": 8645
},
{
"epoch": 0.9,
"learning_rate": 0.0003000173520735728,
"loss": 0.199,
"step": 10374
},
{
"epoch": 1.0,
"eval_loss": 0.8173184990882874,
"eval_max_distance": 167,
"eval_mean_distance": 17,
"eval_runtime": 64.0638,
"eval_samples_per_second": 15.609,
"eval_steps_per_second": 0.78,
"step": 11526
},
{
"epoch": 1.05,
"learning_rate": 0.0003500202440858349,
"loss": 0.2481,
"step": 12103
},
{
"epoch": 1.2,
"learning_rate": 0.00040002313609809704,
"loss": 0.1244,
"step": 13832
},
{
"epoch": 1.35,
"learning_rate": 0.0004500260281103592,
"loss": 0.1055,
"step": 15561
},
{
"epoch": 1.5,
"learning_rate": 0.0005000289201226213,
"loss": 0.102,
"step": 17290
},
{
"epoch": 1.65,
"learning_rate": 0.0005500318121348835,
"loss": 0.102,
"step": 19019
},
{
"epoch": 1.8,
"learning_rate": 0.0006000347041471456,
"loss": 0.1083,
"step": 20748
},
{
"epoch": 1.95,
"learning_rate": 0.0006500375961594078,
"loss": 0.1286,
"step": 22477
},
{
"epoch": 2.0,
"eval_loss": 0.5452634692192078,
"eval_max_distance": 158,
"eval_mean_distance": 14,
"eval_runtime": 30.4268,
"eval_samples_per_second": 32.866,
"eval_steps_per_second": 1.643,
"step": 23052
},
{
"epoch": 2.1,
"learning_rate": 0.0007000404881716698,
"loss": 0.1449,
"step": 24206
},
{
"epoch": 2.25,
"learning_rate": 0.000750043380183932,
"loss": 0.0747,
"step": 25935
},
{
"epoch": 2.4,
"learning_rate": 0.0008000462721961941,
"loss": 0.0744,
"step": 27664
},
{
"epoch": 2.55,
"learning_rate": 0.0008500491642084563,
"loss": 0.0742,
"step": 29393
},
{
"epoch": 2.7,
"learning_rate": 0.0009000520562207184,
"loss": 0.0792,
"step": 31122
},
{
"epoch": 2.85,
"learning_rate": 0.0009500549482329805,
"loss": 0.0891,
"step": 32851
},
{
"epoch": 3.0,
"eval_loss": 0.3629104495048523,
"eval_max_distance": 122,
"eval_mean_distance": 10,
"eval_runtime": 35.1149,
"eval_samples_per_second": 28.478,
"eval_steps_per_second": 1.424,
"step": 34578
},
{
"epoch": 3.0,
"learning_rate": 0.0009999935733060843,
"loss": 0.1079,
"step": 34580
},
{
"epoch": 3.15,
"learning_rate": 0.000994437696415833,
"loss": 0.0962,
"step": 36309
},
{
"epoch": 3.3,
"learning_rate": 0.0009888818195255813,
"loss": 0.059,
"step": 38038
},
{
"epoch": 3.45,
"learning_rate": 0.0009833259426353302,
"loss": 0.0576,
"step": 39767
},
{
"epoch": 3.6,
"learning_rate": 0.0009777700657450789,
"loss": 0.058,
"step": 41496
},
{
"epoch": 3.75,
"learning_rate": 0.0009722141888548275,
"loss": 0.0611,
"step": 43225
},
{
"epoch": 3.9,
"learning_rate": 0.0009666583119645761,
"loss": 0.0711,
"step": 44954
},
{
"epoch": 4.0,
"eval_loss": 0.4011004865169525,
"eval_max_distance": 114,
"eval_mean_distance": 12,
"eval_runtime": 24.9859,
"eval_samples_per_second": 40.022,
"eval_steps_per_second": 2.001,
"step": 46104
},
{
"epoch": 4.05,
"learning_rate": 0.0009611024350743247,
"loss": 0.0991,
"step": 46683
},
{
"epoch": 4.2,
"learning_rate": 0.0009555465581840734,
"loss": 0.0481,
"step": 48412
},
{
"epoch": 4.35,
"learning_rate": 0.0009499906812938221,
"loss": 0.044,
"step": 50141
},
{
"epoch": 4.5,
"learning_rate": 0.0009444348044035706,
"loss": 0.0435,
"step": 51870
},
{
"epoch": 4.65,
"learning_rate": 0.0009388789275133194,
"loss": 0.0454,
"step": 53599
},
{
"epoch": 4.8,
"learning_rate": 0.000933323050623068,
"loss": 0.0483,
"step": 55328
},
{
"epoch": 4.95,
"learning_rate": 0.0009277671737328166,
"loss": 0.0566,
"step": 57057
},
{
"epoch": 5.0,
"eval_loss": 0.29974234104156494,
"eval_max_distance": 100,
"eval_mean_distance": 7,
"eval_runtime": 25.609,
"eval_samples_per_second": 39.049,
"eval_steps_per_second": 1.952,
"step": 57630
},
{
"epoch": 5.1,
"learning_rate": 0.0009222112968425653,
"loss": 0.0724,
"step": 58786
},
{
"epoch": 5.25,
"learning_rate": 0.000916655419952314,
"loss": 0.0364,
"step": 60515
},
{
"epoch": 5.4,
"learning_rate": 0.0009110995430620625,
"loss": 0.035,
"step": 62244
},
{
"epoch": 5.55,
"learning_rate": 0.0009055436661718113,
"loss": 0.0353,
"step": 63973
},
{
"epoch": 5.7,
"learning_rate": 0.0008999877892815599,
"loss": 0.0364,
"step": 65702
},
{
"epoch": 5.85,
"learning_rate": 0.0008944319123913086,
"loss": 0.0402,
"step": 67431
},
{
"epoch": 6.0,
"eval_loss": 0.15522713959217072,
"eval_max_distance": 75,
"eval_mean_distance": 4,
"eval_runtime": 24.8618,
"eval_samples_per_second": 40.222,
"eval_steps_per_second": 2.011,
"step": 69156
},
{
"epoch": 6.0,
"learning_rate": 0.0008888760355010572,
"loss": 0.0506,
"step": 69160
},
{
"epoch": 6.15,
"learning_rate": 0.0008833201586108059,
"loss": 0.0514,
"step": 70889
},
{
"epoch": 6.3,
"learning_rate": 0.0008777642817205546,
"loss": 0.0298,
"step": 72618
},
{
"epoch": 6.45,
"learning_rate": 0.0008722084048303031,
"loss": 0.0287,
"step": 74347
},
{
"epoch": 6.6,
"learning_rate": 0.0008666525279400518,
"loss": 0.0289,
"step": 76076
},
{
"epoch": 6.75,
"learning_rate": 0.0008610966510498005,
"loss": 0.0303,
"step": 77805
},
{
"epoch": 6.9,
"learning_rate": 0.0008555407741595491,
"loss": 0.0348,
"step": 79534
},
{
"epoch": 7.0,
"eval_loss": 0.15130603313446045,
"eval_max_distance": 79,
"eval_mean_distance": 3,
"eval_runtime": 25.5455,
"eval_samples_per_second": 39.146,
"eval_steps_per_second": 1.957,
"step": 80682
},
{
"epoch": 7.05,
"learning_rate": 0.0008499848972692977,
"loss": 0.0527,
"step": 81263
},
{
"epoch": 7.2,
"learning_rate": 0.0008444290203790465,
"loss": 0.0293,
"step": 82992
},
{
"epoch": 7.35,
"learning_rate": 0.000838873143488795,
"loss": 0.0247,
"step": 84721
},
{
"epoch": 7.5,
"learning_rate": 0.0008333172665985437,
"loss": 0.024,
"step": 86450
},
{
"epoch": 7.65,
"learning_rate": 0.0008277613897082924,
"loss": 0.0243,
"step": 88179
},
{
"epoch": 7.8,
"learning_rate": 0.000822205512818041,
"loss": 0.0263,
"step": 89908
},
{
"epoch": 7.95,
"learning_rate": 0.0008166496359277897,
"loss": 0.0302,
"step": 91637
},
{
"epoch": 8.0,
"eval_loss": 0.14522188901901245,
"eval_max_distance": 76,
"eval_mean_distance": 3,
"eval_runtime": 25.6271,
"eval_samples_per_second": 39.021,
"eval_steps_per_second": 1.951,
"step": 92208
},
{
"epoch": 8.1,
"learning_rate": 0.0008110937590375384,
"loss": 0.0445,
"step": 93366
},
{
"epoch": 8.25,
"learning_rate": 0.0008055378821472869,
"loss": 0.0229,
"step": 95095
},
{
"epoch": 8.4,
"learning_rate": 0.0007999820052570357,
"loss": 0.0207,
"step": 96824
},
{
"epoch": 8.55,
"learning_rate": 0.0007944261283667843,
"loss": 0.0203,
"step": 98553
},
{
"epoch": 8.7,
"learning_rate": 0.000788870251476533,
"loss": 0.021,
"step": 100282
},
{
"epoch": 8.85,
"learning_rate": 0.0007833143745862816,
"loss": 0.0223,
"step": 102011
},
{
"epoch": 9.0,
"eval_loss": 0.08658243715763092,
"eval_max_distance": 76,
"eval_mean_distance": 1,
"eval_runtime": 23.498,
"eval_samples_per_second": 42.557,
"eval_steps_per_second": 2.128,
"step": 103734
},
{
"epoch": 9.0,
"learning_rate": 0.0007777584976960303,
"loss": 0.0284,
"step": 103740
},
{
"epoch": 9.15,
"learning_rate": 0.0007722026208057789,
"loss": 0.034,
"step": 105469
},
{
"epoch": 9.3,
"learning_rate": 0.0007666467439155275,
"loss": 0.0193,
"step": 107198
},
{
"epoch": 9.45,
"learning_rate": 0.0007610908670252762,
"loss": 0.0175,
"step": 108927
},
{
"epoch": 9.6,
"learning_rate": 0.0007555349901350248,
"loss": 0.0172,
"step": 110656
},
{
"epoch": 9.75,
"learning_rate": 0.0007499791132447735,
"loss": 0.018,
"step": 112385
},
{
"epoch": 9.9,
"learning_rate": 0.0007444232363545221,
"loss": 0.0202,
"step": 114114
},
{
"epoch": 10.0,
"eval_loss": 0.10908353328704834,
"eval_max_distance": 71,
"eval_mean_distance": 2,
"eval_runtime": 25.0909,
"eval_samples_per_second": 39.855,
"eval_steps_per_second": 1.993,
"step": 115260
},
{
"epoch": 10.05,
"learning_rate": 0.0007388673594642709,
"loss": 0.0342,
"step": 115843
},
{
"epoch": 10.2,
"learning_rate": 0.0007333114825740194,
"loss": 0.0202,
"step": 117572
},
{
"epoch": 10.35,
"learning_rate": 0.0007277556056837681,
"loss": 0.0159,
"step": 119301
},
{
"epoch": 10.5,
"learning_rate": 0.0007221997287935168,
"loss": 0.0149,
"step": 121030
},
{
"epoch": 10.65,
"learning_rate": 0.0007166438519032654,
"loss": 0.015,
"step": 122759
},
{
"epoch": 10.8,
"learning_rate": 0.000711087975013014,
"loss": 0.0155,
"step": 124488
},
{
"epoch": 10.95,
"learning_rate": 0.0007055320981227628,
"loss": 0.0175,
"step": 126217
},
{
"epoch": 11.0,
"eval_loss": 0.06553788483142853,
"eval_max_distance": 66,
"eval_mean_distance": 1,
"eval_runtime": 22.988,
"eval_samples_per_second": 43.501,
"eval_steps_per_second": 2.175,
"step": 126786
},
{
"epoch": 11.1,
"learning_rate": 0.0006999762212325114,
"loss": 0.0305,
"step": 127946
},
{
"epoch": 11.25,
"learning_rate": 0.00069442034434226,
"loss": 0.0157,
"step": 129675
},
{
"epoch": 11.4,
"learning_rate": 0.0006888644674520087,
"loss": 0.0134,
"step": 131404
},
{
"epoch": 11.55,
"learning_rate": 0.0006833085905617574,
"loss": 0.0124,
"step": 133133
},
{
"epoch": 11.7,
"learning_rate": 0.0006777527136715059,
"loss": 0.0131,
"step": 134862
},
{
"epoch": 11.85,
"learning_rate": 0.0006721968367812547,
"loss": 0.014,
"step": 136591
},
{
"epoch": 12.0,
"eval_loss": 0.04735955968499184,
"eval_max_distance": 44,
"eval_mean_distance": 0,
"eval_runtime": 22.5236,
"eval_samples_per_second": 44.398,
"eval_steps_per_second": 2.22,
"step": 138312
},
{
"epoch": 12.0,
"learning_rate": 0.0006666409598910033,
"loss": 0.0182,
"step": 138320
},
{
"epoch": 12.15,
"learning_rate": 0.000661085083000752,
"loss": 0.025,
"step": 140049
},
{
"epoch": 12.3,
"learning_rate": 0.0006555292061105006,
"loss": 0.0128,
"step": 141778
},
{
"epoch": 12.45,
"learning_rate": 0.0006499733292202492,
"loss": 0.0114,
"step": 143507
},
{
"epoch": 12.6,
"learning_rate": 0.0006444174523299979,
"loss": 0.011,
"step": 145236
},
{
"epoch": 12.75,
"learning_rate": 0.0006388615754397465,
"loss": 0.0117,
"step": 146965
},
{
"epoch": 12.9,
"learning_rate": 0.0006333056985494952,
"loss": 0.0122,
"step": 148694
},
{
"epoch": 13.0,
"eval_loss": 0.05152251571416855,
"eval_max_distance": 42,
"eval_mean_distance": 0,
"eval_runtime": 23.898,
"eval_samples_per_second": 41.845,
"eval_steps_per_second": 2.092,
"step": 149838
},
{
"epoch": 13.05,
"learning_rate": 0.0006277498216592438,
"loss": 0.0241,
"step": 150423
},
{
"epoch": 13.2,
"learning_rate": 0.0006221939447689925,
"loss": 0.0148,
"step": 152152
},
{
"epoch": 13.35,
"learning_rate": 0.0006166380678787411,
"loss": 0.0106,
"step": 153881
},
{
"epoch": 13.5,
"learning_rate": 0.0006110821909884899,
"loss": 0.0096,
"step": 155610
},
{
"epoch": 13.65,
"learning_rate": 0.0006055263140982384,
"loss": 0.0098,
"step": 157339
},
{
"epoch": 13.8,
"learning_rate": 0.0005999704372079872,
"loss": 0.0104,
"step": 159068
},
{
"epoch": 13.95,
"learning_rate": 0.0005944145603177358,
"loss": 0.0117,
"step": 160797
},
{
"epoch": 14.0,
"eval_loss": 0.047906968742609024,
"eval_max_distance": 30,
"eval_mean_distance": 0,
"eval_runtime": 23.3531,
"eval_samples_per_second": 42.821,
"eval_steps_per_second": 2.141,
"step": 161364
},
{
"epoch": 14.1,
"learning_rate": 0.0005888586834274844,
"loss": 0.0224,
"step": 162526
},
{
"epoch": 14.25,
"learning_rate": 0.0005833028065372331,
"loss": 0.0111,
"step": 164255
},
{
"epoch": 14.4,
"learning_rate": 0.0005777469296469818,
"loss": 0.009,
"step": 165984
},
{
"epoch": 14.55,
"learning_rate": 0.0005721910527567303,
"loss": 0.0086,
"step": 167713
},
{
"epoch": 14.7,
"learning_rate": 0.000566635175866479,
"loss": 0.0088,
"step": 169442
},
{
"epoch": 14.85,
"learning_rate": 0.0005610792989762277,
"loss": 0.0093,
"step": 171171
},
{
"epoch": 15.0,
"eval_loss": 0.05651288107037544,
"eval_max_distance": 56,
"eval_mean_distance": 0,
"eval_runtime": 23.6545,
"eval_samples_per_second": 42.275,
"eval_steps_per_second": 2.114,
"step": 172890
},
{
"epoch": 15.0,
"learning_rate": 0.0005555234220859762,
"loss": 0.0124,
"step": 172900
},
{
"epoch": 15.15,
"learning_rate": 0.000549967545195725,
"loss": 0.0181,
"step": 174629
},
{
"epoch": 15.3,
"learning_rate": 0.0005444116683054736,
"loss": 0.0091,
"step": 176358
},
{
"epoch": 15.45,
"learning_rate": 0.0005388557914152222,
"loss": 0.0075,
"step": 178087
},
{
"epoch": 15.6,
"learning_rate": 0.0005332999145249709,
"loss": 0.0075,
"step": 179816
},
{
"epoch": 15.75,
"learning_rate": 0.0005277440376347196,
"loss": 0.0075,
"step": 181545
},
{
"epoch": 15.9,
"learning_rate": 0.0005221881607444683,
"loss": 0.0085,
"step": 183274
},
{
"epoch": 16.0,
"eval_loss": 0.047154366970062256,
"eval_max_distance": 34,
"eval_mean_distance": 0,
"eval_runtime": 23.554,
"eval_samples_per_second": 42.456,
"eval_steps_per_second": 2.123,
"step": 184416
},
{
"epoch": 16.05,
"learning_rate": 0.0005166322838542169,
"loss": 0.0169,
"step": 185003
},
{
"epoch": 16.2,
"learning_rate": 0.0005110764069639655,
"loss": 0.0104,
"step": 186732
},
{
"epoch": 16.35,
"learning_rate": 0.0005055205300737143,
"loss": 0.0072,
"step": 188461
},
{
"epoch": 16.5,
"learning_rate": 0.0004999646531834628,
"loss": 0.0068,
"step": 190190
},
{
"epoch": 16.65,
"learning_rate": 0.0004944087762932115,
"loss": 0.0064,
"step": 191919
},
{
"epoch": 16.8,
"learning_rate": 0.0004888528994029601,
"loss": 0.0068,
"step": 193648
},
{
"epoch": 16.95,
"learning_rate": 0.0004832970225127088,
"loss": 0.0075,
"step": 195377
},
{
"epoch": 17.0,
"eval_loss": 0.04200902581214905,
"eval_max_distance": 28,
"eval_mean_distance": 0,
"eval_runtime": 22.1607,
"eval_samples_per_second": 45.125,
"eval_steps_per_second": 2.256,
"step": 195942
},
{
"epoch": 17.1,
"learning_rate": 0.00047774114562245746,
"loss": 0.0162,
"step": 197106
},
{
"epoch": 17.25,
"learning_rate": 0.00047218526873220605,
"loss": 0.0077,
"step": 198835
},
{
"epoch": 17.4,
"learning_rate": 0.00046662939184195475,
"loss": 0.0063,
"step": 200564
},
{
"epoch": 17.55,
"learning_rate": 0.0004610735149517034,
"loss": 0.0058,
"step": 202293
},
{
"epoch": 17.7,
"learning_rate": 0.0004555176380614521,
"loss": 0.0057,
"step": 204022
},
{
"epoch": 17.85,
"learning_rate": 0.0004499617611712007,
"loss": 0.0059,
"step": 205751
},
{
"epoch": 18.0,
"eval_loss": 0.04149915650486946,
"eval_max_distance": 32,
"eval_mean_distance": 0,
"eval_runtime": 22.9895,
"eval_samples_per_second": 43.498,
"eval_steps_per_second": 2.175,
"step": 207468
},
{
"epoch": 18.0,
"learning_rate": 0.00044440588428094934,
"loss": 0.0082,
"step": 207480
},
{
"epoch": 18.15,
"learning_rate": 0.00043885000739069804,
"loss": 0.0133,
"step": 209209
},
{
"epoch": 18.3,
"learning_rate": 0.00043329413050044663,
"loss": 0.0063,
"step": 210938
},
{
"epoch": 18.45,
"learning_rate": 0.0004277382536101953,
"loss": 0.0051,
"step": 212667
},
{
"epoch": 18.6,
"learning_rate": 0.000422182376719944,
"loss": 0.0051,
"step": 214396
},
{
"epoch": 18.75,
"learning_rate": 0.00041662649982969263,
"loss": 0.0051,
"step": 216125
},
{
"epoch": 18.9,
"learning_rate": 0.0004110706229394413,
"loss": 0.0054,
"step": 217854
},
{
"epoch": 19.0,
"eval_loss": 0.0405677855014801,
"eval_max_distance": 28,
"eval_mean_distance": 0,
"eval_runtime": 21.777,
"eval_samples_per_second": 45.92,
"eval_steps_per_second": 2.296,
"step": 218994
},
{
"epoch": 19.05,
"learning_rate": 0.0004055147460491899,
"loss": 0.0117,
"step": 219583
},
{
"epoch": 19.2,
"learning_rate": 0.00039995886915893857,
"loss": 0.0075,
"step": 221312
},
{
"epoch": 19.35,
"learning_rate": 0.00039440299226868727,
"loss": 0.0051,
"step": 223041
},
{
"epoch": 19.5,
"learning_rate": 0.00038884711537843586,
"loss": 0.0046,
"step": 224770
},
{
"epoch": 19.65,
"learning_rate": 0.0003832912384881845,
"loss": 0.0043,
"step": 226499
},
{
"epoch": 19.8,
"learning_rate": 0.0003777353615979332,
"loss": 0.0044,
"step": 228228
},
{
"epoch": 19.95,
"learning_rate": 0.0003721794847076818,
"loss": 0.0046,
"step": 229957
},
{
"epoch": 20.0,
"eval_loss": 0.03926468640565872,
"eval_max_distance": 24,
"eval_mean_distance": 0,
"eval_runtime": 23.1294,
"eval_samples_per_second": 43.235,
"eval_steps_per_second": 2.162,
"step": 230520
},
{
"epoch": 20.1,
"learning_rate": 0.0003666236078174305,
"loss": 0.0118,
"step": 231686
},
{
"epoch": 20.25,
"learning_rate": 0.00036106773092717915,
"loss": 0.0056,
"step": 233415
},
{
"epoch": 20.4,
"learning_rate": 0.0003555118540369278,
"loss": 0.0043,
"step": 235144
},
{
"epoch": 20.55,
"learning_rate": 0.00034995597714667645,
"loss": 0.0039,
"step": 236873
},
{
"epoch": 20.7,
"learning_rate": 0.0003444001002564251,
"loss": 0.0037,
"step": 238602
},
{
"epoch": 20.85,
"learning_rate": 0.00033884422336617374,
"loss": 0.004,
"step": 240331
},
{
"epoch": 21.0,
"eval_loss": 0.04168349876999855,
"eval_max_distance": 24,
"eval_mean_distance": 0,
"eval_runtime": 22.786,
"eval_samples_per_second": 43.887,
"eval_steps_per_second": 2.194,
"step": 242046
},
{
"epoch": 21.0,
"learning_rate": 0.0003332883464759224,
"loss": 0.0053,
"step": 242060
},
{
"epoch": 21.15,
"learning_rate": 0.00032773246958567103,
"loss": 0.0097,
"step": 243789
},
{
"epoch": 21.3,
"learning_rate": 0.00032217659269541973,
"loss": 0.0044,
"step": 245518
},
{
"epoch": 21.45,
"learning_rate": 0.0003166207158051684,
"loss": 0.0036,
"step": 247247
},
{
"epoch": 21.6,
"learning_rate": 0.000311064838914917,
"loss": 0.0031,
"step": 248976
},
{
"epoch": 21.75,
"learning_rate": 0.0003055089620246657,
"loss": 0.0031,
"step": 250705
},
{
"epoch": 21.9,
"learning_rate": 0.0002999530851344143,
"loss": 0.0034,
"step": 252434
},
{
"epoch": 22.0,
"eval_loss": 0.040287140756845474,
"eval_max_distance": 18,
"eval_mean_distance": 0,
"eval_runtime": 21.6664,
"eval_samples_per_second": 46.155,
"eval_steps_per_second": 2.308,
"step": 253572
},
{
"epoch": 22.05,
"learning_rate": 0.0002943972082441629,
"loss": 0.0082,
"step": 254163
},
{
"epoch": 22.2,
"learning_rate": 0.0002888413313539116,
"loss": 0.0053,
"step": 255892
},
{
"epoch": 22.35,
"learning_rate": 0.00028328545446366026,
"loss": 0.0034,
"step": 257621
},
{
"epoch": 22.5,
"learning_rate": 0.00027772957757340896,
"loss": 0.0031,
"step": 259350
},
{
"epoch": 22.65,
"learning_rate": 0.00027217370068315756,
"loss": 0.0029,
"step": 261079
},
{
"epoch": 22.8,
"learning_rate": 0.0002666178237929062,
"loss": 0.0027,
"step": 262808
},
{
"epoch": 22.95,
"learning_rate": 0.0002610619469026549,
"loss": 0.0029,
"step": 264537
},
{
"epoch": 23.0,
"eval_loss": 0.04222797229886055,
"eval_max_distance": 21,
"eval_mean_distance": 0,
"eval_runtime": 22.874,
"eval_samples_per_second": 43.718,
"eval_steps_per_second": 2.186,
"step": 265098
},
{
"epoch": 23.1,
"learning_rate": 0.0002555060700124035,
"loss": 0.0081,
"step": 266266
},
{
"epoch": 23.25,
"learning_rate": 0.00024995019312215214,
"loss": 0.0038,
"step": 267995
},
{
"epoch": 23.4,
"learning_rate": 0.00024439431623190085,
"loss": 0.0028,
"step": 269724
},
{
"epoch": 23.55,
"learning_rate": 0.00023883843934164947,
"loss": 0.0025,
"step": 271453
},
{
"epoch": 23.7,
"learning_rate": 0.00023328256245139814,
"loss": 0.0026,
"step": 273182
},
{
"epoch": 23.85,
"learning_rate": 0.0002277266855611468,
"loss": 0.0024,
"step": 274911
},
{
"epoch": 24.0,
"eval_loss": 0.04101773351430893,
"eval_max_distance": 21,
"eval_mean_distance": 0,
"eval_runtime": 22.8008,
"eval_samples_per_second": 43.858,
"eval_steps_per_second": 2.193,
"step": 276624
},
{
"epoch": 24.0,
"learning_rate": 0.00022217080867089543,
"loss": 0.0033,
"step": 276640
},
{
"epoch": 24.15,
"learning_rate": 0.00021661493178064408,
"loss": 0.0069,
"step": 278369
},
{
"epoch": 24.3,
"learning_rate": 0.00021105905489039273,
"loss": 0.0029,
"step": 280098
},
{
"epoch": 24.45,
"learning_rate": 0.0002055031780001414,
"loss": 0.0024,
"step": 281827
},
{
"epoch": 24.6,
"learning_rate": 0.00019994730110989005,
"loss": 0.0021,
"step": 283556
},
{
"epoch": 24.75,
"learning_rate": 0.0001943914242196387,
"loss": 0.002,
"step": 285285
},
{
"epoch": 24.9,
"learning_rate": 0.00018883554732938734,
"loss": 0.002,
"step": 287014
},
{
"epoch": 25.0,
"eval_loss": 0.043479613959789276,
"eval_max_distance": 15,
"eval_mean_distance": 0,
"eval_runtime": 21.481,
"eval_samples_per_second": 46.553,
"eval_steps_per_second": 2.328,
"step": 288150
},
{
"epoch": 25.05,
"learning_rate": 0.000183279670439136,
"loss": 0.0055,
"step": 288743
},
{
"epoch": 25.2,
"learning_rate": 0.00017772379354888466,
"loss": 0.0038,
"step": 290472
},
{
"epoch": 25.35,
"learning_rate": 0.00017216791665863328,
"loss": 0.0023,
"step": 292201
},
{
"epoch": 25.5,
"learning_rate": 0.00016661203976838196,
"loss": 0.002,
"step": 293930
},
{
"epoch": 25.65,
"learning_rate": 0.0001610561628781306,
"loss": 0.0017,
"step": 295659
},
{
"epoch": 25.8,
"learning_rate": 0.00015550028598787928,
"loss": 0.0018,
"step": 297388
},
{
"epoch": 25.95,
"learning_rate": 0.0001499444090976279,
"loss": 0.0016,
"step": 299117
},
{
"epoch": 26.0,
"eval_loss": 0.04517431557178497,
"eval_max_distance": 15,
"eval_mean_distance": 0,
"eval_runtime": 22.7192,
"eval_samples_per_second": 44.016,
"eval_steps_per_second": 2.201,
"step": 299676
},
{
"epoch": 26.1,
"learning_rate": 0.00014438853220737654,
"loss": 0.0054,
"step": 300846
},
{
"epoch": 26.25,
"learning_rate": 0.00013883265531712522,
"loss": 0.0026,
"step": 302575
},
{
"epoch": 26.4,
"learning_rate": 0.00013327677842687387,
"loss": 0.0018,
"step": 304304
},
{
"epoch": 26.55,
"learning_rate": 0.0001277209015366225,
"loss": 0.0016,
"step": 306033
},
{
"epoch": 26.7,
"learning_rate": 0.00012216502464637116,
"loss": 0.0015,
"step": 307762
},
{
"epoch": 26.85,
"learning_rate": 0.00011660914775611982,
"loss": 0.0013,
"step": 309491
},
{
"epoch": 27.0,
"eval_loss": 0.04144546017050743,
"eval_max_distance": 14,
"eval_mean_distance": 0,
"eval_runtime": 22.686,
"eval_samples_per_second": 44.08,
"eval_steps_per_second": 2.204,
"step": 311202
},
{
"epoch": 27.0,
"learning_rate": 0.00011105327086586847,
"loss": 0.0021,
"step": 311220
},
{
"epoch": 27.15,
"learning_rate": 0.00010549739397561713,
"loss": 0.0046,
"step": 312949
},
{
"epoch": 27.3,
"learning_rate": 9.994151708536577e-05,
"loss": 0.002,
"step": 314678
},
{
"epoch": 27.45,
"learning_rate": 9.438564019511443e-05,
"loss": 0.0016,
"step": 316407
},
{
"epoch": 27.6,
"learning_rate": 8.882976330486308e-05,
"loss": 0.0013,
"step": 318136
},
{
"epoch": 27.75,
"learning_rate": 8.327388641461173e-05,
"loss": 0.0013,
"step": 319865
},
{
"epoch": 27.9,
"learning_rate": 7.771800952436038e-05,
"loss": 0.0012,
"step": 321594
},
{
"epoch": 28.0,
"eval_loss": 0.04387975484132767,
"eval_max_distance": 14,
"eval_mean_distance": 0,
"eval_runtime": 21.586,
"eval_samples_per_second": 46.326,
"eval_steps_per_second": 2.316,
"step": 322728
},
{
"epoch": 28.05,
"learning_rate": 7.216213263410904e-05,
"loss": 0.0037,
"step": 323323
},
{
"epoch": 28.2,
"learning_rate": 6.660625574385768e-05,
"loss": 0.0027,
"step": 325052
},
{
"epoch": 28.35,
"learning_rate": 6.105037885360634e-05,
"loss": 0.0016,
"step": 326781
},
{
"epoch": 28.5,
"learning_rate": 5.5494501963355e-05,
"loss": 0.0013,
"step": 328510
},
{
"epoch": 28.65,
"learning_rate": 4.9938625073103644e-05,
"loss": 0.0011,
"step": 330239
},
{
"epoch": 28.8,
"learning_rate": 4.43827481828523e-05,
"loss": 0.001,
"step": 331968
},
{
"epoch": 28.95,
"learning_rate": 3.882687129260095e-05,
"loss": 0.001,
"step": 333697
},
{
"epoch": 29.0,
"eval_loss": 0.044395141303539276,
"eval_max_distance": 15,
"eval_mean_distance": 0,
"eval_runtime": 22.6289,
"eval_samples_per_second": 44.191,
"eval_steps_per_second": 2.21,
"step": 334254
},
{
"epoch": 29.1,
"learning_rate": 3.32709944023496e-05,
"loss": 0.0039,
"step": 335426
},
{
"epoch": 29.25,
"learning_rate": 2.771511751209825e-05,
"loss": 0.0017,
"step": 337155
},
{
"epoch": 29.4,
"learning_rate": 2.2159240621846906e-05,
"loss": 0.0012,
"step": 338884
},
{
"epoch": 29.55,
"learning_rate": 1.6603363731595556e-05,
"loss": 0.0011,
"step": 340613
},
{
"epoch": 29.7,
"learning_rate": 1.1047486841344207e-05,
"loss": 0.001,
"step": 342342
},
{
"epoch": 29.85,
"learning_rate": 0.0004477394104151002,
"loss": 0.0026,
"step": 344071
},
{
"epoch": 30.0,
"eval_loss": 0.042660146951675415,
"eval_max_distance": 19,
"eval_mean_distance": 0,
"eval_runtime": 48.9711,
"eval_samples_per_second": 20.42,
"eval_steps_per_second": 1.021,
"step": 345780
},
{
"epoch": 30.0,
"learning_rate": 0.00044440588428094934,
"loss": 0.0045,
"step": 345800
},
{
"epoch": 30.15,
"learning_rate": 0.00044107235814679855,
"loss": 0.0068,
"step": 347529
},
{
"epoch": 30.3,
"learning_rate": 0.00043773883201264776,
"loss": 0.0036,
"step": 349258
},
{
"epoch": 30.45,
"learning_rate": 0.0004344053058784969,
"loss": 0.0032,
"step": 350987
},
{
"epoch": 30.6,
"learning_rate": 0.0004310717797443461,
"loss": 0.0033,
"step": 352716
},
{
"epoch": 30.75,
"learning_rate": 0.0004277382536101953,
"loss": 0.0034,
"step": 354445
},
{
"epoch": 30.9,
"learning_rate": 0.00042440472747604454,
"loss": 0.004,
"step": 356174
},
{
"epoch": 23.29,
"learning_rate": 0.000593547500777279,
"loss": 0.0057,
"step": 357903
},
{
"epoch": 23.4,
"learning_rate": 0.0005910471934809801,
"loss": 0.0053,
"step": 359632
},
{
"epoch": 23.52,
"learning_rate": 0.0005885468861846814,
"loss": 0.0055,
"step": 361361
},
{
"epoch": 23.63,
"learning_rate": 0.0005860465788883828,
"loss": 0.0056,
"step": 363090
},
{
"epoch": 23.74,
"learning_rate": 0.0005835462715920841,
"loss": 0.006,
"step": 364819
},
{
"epoch": 23.85,
"learning_rate": 0.0005810459642957853,
"loss": 0.0064,
"step": 366548
},
{
"epoch": 23.97,
"learning_rate": 0.0005785456569994867,
"loss": 0.0077,
"step": 368277
},
{
"epoch": 24.0,
"eval_loss": 0.049542564898729324,
"eval_max_distance": 27,
"eval_mean_distance": 0,
"eval_runtime": 72.4399,
"eval_samples_per_second": 13.805,
"eval_steps_per_second": 0.925,
"step": 368808
},
{
"epoch": 24.08,
"learning_rate": 0.0005760453497031879,
"loss": 0.0158,
"step": 370006
},
{
"epoch": 24.19,
"learning_rate": 0.0005735450424068892,
"loss": 0.0082,
"step": 371735
},
{
"epoch": 24.3,
"learning_rate": 0.0005710447351105905,
"loss": 0.0057,
"step": 373464
},
{
"epoch": 24.42,
"learning_rate": 0.0005685444278142918,
"loss": 0.0059,
"step": 375193
},
{
"epoch": 24.53,
"learning_rate": 0.0005660441205179932,
"loss": 0.0055,
"step": 376922
},
{
"epoch": 24.64,
"learning_rate": 0.0005635438132216944,
"loss": 0.0062,
"step": 378651
},
{
"epoch": 24.75,
"learning_rate": 0.0005610435059253957,
"loss": 0.0059,
"step": 380380
},
{
"epoch": 24.87,
"learning_rate": 0.000558543198629097,
"loss": 0.0062,
"step": 382109
},
{
"epoch": 24.98,
"learning_rate": 0.0005560428913327982,
"loss": 0.0083,
"step": 383838
},
{
"epoch": 25.0,
"eval_loss": 0.044624801725149155,
"eval_max_distance": 37,
"eval_mean_distance": 0,
"eval_runtime": 35.859,
"eval_samples_per_second": 27.887,
"eval_steps_per_second": 1.868,
"step": 384175
},
{
"epoch": 25.09,
"learning_rate": 0.0005535425840364996,
"loss": 0.0154,
"step": 385567
},
{
"epoch": 25.2,
"learning_rate": 0.0005510422767402009,
"loss": 0.0075,
"step": 387296
},
{
"epoch": 25.32,
"learning_rate": 0.0005485419694439023,
"loss": 0.006,
"step": 389025
},
{
"epoch": 25.43,
"learning_rate": 0.0005460416621476034,
"loss": 0.0057,
"step": 390754
},
{
"epoch": 25.54,
"learning_rate": 0.0005435413548513047,
"loss": 0.0057,
"step": 392483
},
{
"epoch": 25.65,
"learning_rate": 0.0005410410475550061,
"loss": 0.0056,
"step": 394212
},
{
"epoch": 25.77,
"learning_rate": 0.0005385407402587073,
"loss": 0.0059,
"step": 395941
},
{
"epoch": 25.88,
"learning_rate": 0.0005360404329624087,
"loss": 0.0067,
"step": 397670
},
{
"epoch": 25.99,
"learning_rate": 0.0005335401256661099,
"loss": 0.0078,
"step": 399399
},
{
"epoch": 26.0,
"eval_loss": 0.048115409910678864,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 35.8872,
"eval_samples_per_second": 27.865,
"eval_steps_per_second": 1.867,
"step": 399542
},
{
"epoch": 26.1,
"learning_rate": 0.0005310398183698112,
"loss": 0.0151,
"step": 401128
},
{
"epoch": 26.22,
"learning_rate": 0.0005285395110735125,
"loss": 0.0068,
"step": 402857
},
{
"epoch": 26.33,
"learning_rate": 0.0005260392037772138,
"loss": 0.0055,
"step": 404586
},
{
"epoch": 26.44,
"learning_rate": 0.0005235388964809152,
"loss": 0.0052,
"step": 406315
},
{
"epoch": 26.55,
"learning_rate": 0.0005210385891846165,
"loss": 0.0054,
"step": 408044
},
{
"epoch": 26.67,
"learning_rate": 0.0005185382818883176,
"loss": 0.0053,
"step": 409773
},
{
"epoch": 26.78,
"learning_rate": 0.000516037974592019,
"loss": 0.0052,
"step": 411502
},
{
"epoch": 26.89,
"learning_rate": 0.0005135376672957203,
"loss": 0.006,
"step": 413231
},
{
"epoch": 27.0,
"eval_loss": 0.042408570647239685,
"eval_max_distance": 37,
"eval_mean_distance": 0,
"eval_runtime": 14.3824,
"eval_samples_per_second": 69.529,
"eval_steps_per_second": 4.658,
"step": 414909
},
{
"epoch": 27.0,
"learning_rate": 0.0005110373599994215,
"loss": 0.0095,
"step": 414960
},
{
"epoch": 27.12,
"learning_rate": 0.0005085370527031229,
"loss": 0.0115,
"step": 416689
},
{
"epoch": 27.23,
"learning_rate": 0.0005060367454068242,
"loss": 0.006,
"step": 418418
},
{
"epoch": 27.34,
"learning_rate": 0.0005035364381105254,
"loss": 0.0051,
"step": 420147
},
{
"epoch": 27.45,
"learning_rate": 0.0005010361308142267,
"loss": 0.0047,
"step": 421876
},
{
"epoch": 27.57,
"learning_rate": 0.000498535823517928,
"loss": 0.0047,
"step": 423605
},
{
"epoch": 27.68,
"learning_rate": 0.0004960355162216294,
"loss": 0.0054,
"step": 425334
},
{
"epoch": 27.79,
"learning_rate": 0.0004935352089253306,
"loss": 0.0052,
"step": 427063
},
{
"epoch": 27.9,
"learning_rate": 0.000491034901629032,
"loss": 0.0056,
"step": 428792
},
{
"epoch": 28.0,
"eval_loss": 0.04393070191144943,
"eval_max_distance": 22,
"eval_mean_distance": 0,
"eval_runtime": 26.5483,
"eval_samples_per_second": 37.667,
"eval_steps_per_second": 2.524,
"step": 430276
},
{
"epoch": 28.02,
"learning_rate": 0.0004885345943327332,
"loss": 0.0105,
"step": 430521
},
{
"epoch": 28.13,
"learning_rate": 0.0004860342870364345,
"loss": 0.0093,
"step": 432250
},
{
"epoch": 28.24,
"learning_rate": 0.0004835339797401358,
"loss": 0.0055,
"step": 433979
},
{
"epoch": 28.35,
"learning_rate": 0.0004810336724438371,
"loss": 0.0049,
"step": 435708
},
{
"epoch": 28.47,
"learning_rate": 0.00047853336514753835,
"loss": 0.0044,
"step": 437437
},
{
"epoch": 28.58,
"learning_rate": 0.0004760330578512397,
"loss": 0.0044,
"step": 439166
},
{
"epoch": 28.69,
"learning_rate": 0.000473532750554941,
"loss": 0.0047,
"step": 440895
},
{
"epoch": 28.8,
"learning_rate": 0.0004710324432586423,
"loss": 0.0047,
"step": 442624
},
{
"epoch": 28.92,
"learning_rate": 0.00046853213596234356,
"loss": 0.0054,
"step": 444353
},
{
"epoch": 29.0,
"eval_loss": 0.04814203828573227,
"eval_max_distance": 23,
"eval_mean_distance": 0,
"eval_runtime": 35.122,
"eval_samples_per_second": 28.472,
"eval_steps_per_second": 1.908,
"step": 445643
},
{
"epoch": 29.03,
"learning_rate": 0.00046603182866604485,
"loss": 0.0103,
"step": 446082
},
{
"epoch": 29.14,
"learning_rate": 0.00046353152136974614,
"loss": 0.0076,
"step": 447811
},
{
"epoch": 29.25,
"learning_rate": 0.00046103121407344743,
"loss": 0.005,
"step": 449540
},
{
"epoch": 29.37,
"learning_rate": 0.00045853090677714877,
"loss": 0.0043,
"step": 451269
},
{
"epoch": 29.48,
"learning_rate": 0.00045603059948085,
"loss": 0.0043,
"step": 452998
},
{
"epoch": 29.59,
"learning_rate": 0.00045353029218455135,
"loss": 0.004,
"step": 454727
},
{
"epoch": 29.7,
"learning_rate": 0.0004510299848882526,
"loss": 0.0042,
"step": 456456
},
{
"epoch": 29.82,
"learning_rate": 6.808721912515757e-06,
"loss": 0.0039,
"step": 458185
},
{
"epoch": 29.93,
"learning_rate": 2.641543085351246e-06,
"loss": 0.004,
"step": 459914
},
{
"epoch": 30.0,
"eval_loss": 0.04417673125863075,
"eval_max_distance": 25,
"eval_mean_distance": 0,
"eval_runtime": 68.6071,
"eval_samples_per_second": 14.576,
"eval_steps_per_second": 0.977,
"step": 461010
},
{
"epoch": 30.0,
"step": 461010,
"total_flos": 1.1619438964958822e+17,
"train_loss": 3.931375028864634e-05,
"train_runtime": 411.3459,
"train_samples_per_second": 16810.814,
"train_steps_per_second": 1120.736
}
],
"logging_steps": 1729,
"max_steps": 461010,
"num_train_epochs": 30,
"save_steps": 3458,
"total_flos": 1.1619438964958822e+17,
"trial_name": null,
"trial_params": null
}