Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d35af2216bb44e6085bcbb7deb56cc0f230fb27bb1bb0c0c8c2b52dad52893c
 size 50358592

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec52b6dac11e4d703e461b57dbd96f63fdad826c54925c92c4a9ed3940c7f925
 size 50358592

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f6b727df5e3cf52d32a08cc3af718bb52f16b143a0fe10b3781327db01b8c6c
 size 25785082

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f992e9bf2526250ac48863a69bc14c754ff3750575d47e4dfdac1bf35fa0d94
 size 25785082

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:032509b8d229993b63bf6eb5ce58592a4165e6eaa97f7e78cf084fd8dfd42e62
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:82f393434236f666cbb4f691051257e303661800a0f9787f5942457107e1ccaa
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de9d2b2a7ed3e3373e7769cf999e7079547a9a97d1513882ecf425b351ddca4b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:beb02bcc76a1d18125c30bb2c994848252fc0f0f039db286b9a31c46c82cab52
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 2.0830085277557373,
   "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.05976393246675631,
   "eval_steps": 100,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3555,6 +3555,714 @@
       "eval_samples_per_second": 40.76,
       "eval_steps_per_second": 10.19,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3569,7 +4277,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -3578,12 +4286,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.0963313026269184e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 2.0830085277557373,
   "best_model_checkpoint": "miner_id_24/checkpoint-400",
+  "epoch": 0.07171671896010758,
   "eval_steps": 100,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 40.76,
       "eval_steps_per_second": 10.19,
       "step": 500
+    },
+    {
+      "epoch": 0.05988346033168983,
+      "grad_norm": 6.650758743286133,
+      "learning_rate": 0.0002484064591485923,
+      "loss": 16.1606,
+      "step": 501
+    },
+    {
+      "epoch": 0.060002988196623336,
+      "grad_norm": 6.113708972930908,
+      "learning_rate": 0.0002483999754345083,
+      "loss": 17.3134,
+      "step": 502
+    },
+    {
+      "epoch": 0.06012251606155685,
+      "grad_norm": 6.330297946929932,
+      "learning_rate": 0.0002483934786418565,
+      "loss": 16.6002,
+      "step": 503
+    },
+    {
+      "epoch": 0.060242043926490366,
+      "grad_norm": 6.722366809844971,
+      "learning_rate": 0.0002483869687713254,
+      "loss": 16.2453,
+      "step": 504
+    },
+    {
+      "epoch": 0.060361571791423874,
+      "grad_norm": 6.331783771514893,
+      "learning_rate": 0.000248380445823605,
+      "loss": 17.3577,
+      "step": 505
+    },
+    {
+      "epoch": 0.06048109965635739,
+      "grad_norm": 5.92828369140625,
+      "learning_rate": 0.00024837390979938674,
+      "loss": 16.2485,
+      "step": 506
+    },
+    {
+      "epoch": 0.0606006275212909,
+      "grad_norm": 13.758573532104492,
+      "learning_rate": 0.0002483673606993632,
+      "loss": 16.9102,
+      "step": 507
+    },
+    {
+      "epoch": 0.06072015538622441,
+      "grad_norm": 5.667209625244141,
+      "learning_rate": 0.00024836079852422855,
+      "loss": 15.9809,
+      "step": 508
+    },
+    {
+      "epoch": 0.060839683251157926,
+      "grad_norm": 6.230560779571533,
+      "learning_rate": 0.00024835422327467826,
+      "loss": 16.8764,
+      "step": 509
+    },
+    {
+      "epoch": 0.06095921111609144,
+      "grad_norm": 6.252189636230469,
+      "learning_rate": 0.00024834763495140927,
+      "loss": 16.3699,
+      "step": 510
+    },
+    {
+      "epoch": 0.06107873898102495,
+      "grad_norm": 6.850605010986328,
+      "learning_rate": 0.00024834103355511974,
+      "loss": 18.5964,
+      "step": 511
+    },
+    {
+      "epoch": 0.061198266845958464,
+      "grad_norm": 6.297305107116699,
+      "learning_rate": 0.0002483344190865094,
+      "loss": 16.974,
+      "step": 512
+    },
+    {
+      "epoch": 0.06131779471089198,
+      "grad_norm": 6.262433052062988,
+      "learning_rate": 0.00024832779154627927,
+      "loss": 17.2464,
+      "step": 513
+    },
+    {
+      "epoch": 0.06143732257582549,
+      "grad_norm": 6.210182189941406,
+      "learning_rate": 0.00024832115093513177,
+      "loss": 17.2708,
+      "step": 514
+    },
+    {
+      "epoch": 0.061556850440759,
+      "grad_norm": 6.332021236419678,
+      "learning_rate": 0.0002483144972537706,
+      "loss": 17.5522,
+      "step": 515
+    },
+    {
+      "epoch": 0.06167637830569252,
+      "grad_norm": 6.204654216766357,
+      "learning_rate": 0.00024830783050290117,
+      "loss": 17.1688,
+      "step": 516
+    },
+    {
+      "epoch": 0.061795906170626025,
+      "grad_norm": 6.059911251068115,
+      "learning_rate": 0.00024830115068322987,
+      "loss": 16.7284,
+      "step": 517
+    },
+    {
+      "epoch": 0.06191543403555954,
+      "grad_norm": 6.756778240203857,
+      "learning_rate": 0.00024829445779546476,
+      "loss": 17.4355,
+      "step": 518
+    },
+    {
+      "epoch": 0.062034961900493055,
+      "grad_norm": 6.644315719604492,
+      "learning_rate": 0.0002482877518403152,
+      "loss": 17.4694,
+      "step": 519
+    },
+    {
+      "epoch": 0.06215448976542656,
+      "grad_norm": 6.000011920928955,
+      "learning_rate": 0.00024828103281849184,
+      "loss": 16.1155,
+      "step": 520
+    },
+    {
+      "epoch": 0.06227401763036008,
+      "grad_norm": 6.423489570617676,
+      "learning_rate": 0.0002482743007307068,
+      "loss": 17.8717,
+      "step": 521
+    },
+    {
+      "epoch": 0.06239354549529359,
+      "grad_norm": 6.263865947723389,
+      "learning_rate": 0.00024826755557767364,
+      "loss": 16.6036,
+      "step": 522
+    },
+    {
+      "epoch": 0.0625130733602271,
+      "grad_norm": 6.165550231933594,
+      "learning_rate": 0.0002482607973601072,
+      "loss": 16.4604,
+      "step": 523
+    },
+    {
+      "epoch": 0.06263260122516062,
+      "grad_norm": 6.165558338165283,
+      "learning_rate": 0.0002482540260787238,
+      "loss": 17.6127,
+      "step": 524
+    },
+    {
+      "epoch": 0.06275212909009413,
+      "grad_norm": 6.105308532714844,
+      "learning_rate": 0.0002482472417342411,
+      "loss": 16.3942,
+      "step": 525
+    },
+    {
+      "epoch": 0.06287165695502764,
+      "grad_norm": 6.943295955657959,
+      "learning_rate": 0.00024824044432737805,
+      "loss": 16.9056,
+      "step": 526
+    },
+    {
+      "epoch": 0.06299118481996116,
+      "grad_norm": 6.2281365394592285,
+      "learning_rate": 0.00024823363385885515,
+      "loss": 16.3102,
+      "step": 527
+    },
+    {
+      "epoch": 0.06311071268489467,
+      "grad_norm": 6.561606407165527,
+      "learning_rate": 0.0002482268103293942,
+      "loss": 17.055,
+      "step": 528
+    },
+    {
+      "epoch": 0.06323024054982818,
+      "grad_norm": 6.460758209228516,
+      "learning_rate": 0.0002482199737397184,
+      "loss": 15.1712,
+      "step": 529
+    },
+    {
+      "epoch": 0.0633497684147617,
+      "grad_norm": 6.111406326293945,
+      "learning_rate": 0.0002482131240905523,
+      "loss": 15.0887,
+      "step": 530
+    },
+    {
+      "epoch": 0.0634692962796952,
+      "grad_norm": 6.305261135101318,
+      "learning_rate": 0.0002482062613826219,
+      "loss": 16.2783,
+      "step": 531
+    },
+    {
+      "epoch": 0.06358882414462871,
+      "grad_norm": 6.447376728057861,
+      "learning_rate": 0.00024819938561665444,
+      "loss": 15.3546,
+      "step": 532
+    },
+    {
+      "epoch": 0.06370835200956224,
+      "grad_norm": 6.626522064208984,
+      "learning_rate": 0.0002481924967933788,
+      "loss": 16.1421,
+      "step": 533
+    },
+    {
+      "epoch": 0.06382787987449574,
+      "grad_norm": 6.10511589050293,
+      "learning_rate": 0.00024818559491352496,
+      "loss": 17.1937,
+      "step": 534
+    },
+    {
+      "epoch": 0.06394740773942925,
+      "grad_norm": 6.697993278503418,
+      "learning_rate": 0.00024817867997782453,
+      "loss": 17.1077,
+      "step": 535
+    },
+    {
+      "epoch": 0.06406693560436277,
+      "grad_norm": 6.638538837432861,
+      "learning_rate": 0.0002481717519870103,
+      "loss": 16.2386,
+      "step": 536
+    },
+    {
+      "epoch": 0.06418646346929628,
+      "grad_norm": 8.689882278442383,
+      "learning_rate": 0.00024816481094181656,
+      "loss": 15.8653,
+      "step": 537
+    },
+    {
+      "epoch": 0.06430599133422979,
+      "grad_norm": 5.992562294006348,
+      "learning_rate": 0.00024815785684297905,
+      "loss": 16.4454,
+      "step": 538
+    },
+    {
+      "epoch": 0.06442551919916331,
+      "grad_norm": 6.0935139656066895,
+      "learning_rate": 0.0002481508896912346,
+      "loss": 15.9363,
+      "step": 539
+    },
+    {
+      "epoch": 0.06454504706409682,
+      "grad_norm": 6.106266021728516,
+      "learning_rate": 0.00024814390948732187,
+      "loss": 16.1408,
+      "step": 540
+    },
+    {
+      "epoch": 0.06466457492903033,
+      "grad_norm": 5.9442830085754395,
+      "learning_rate": 0.00024813691623198046,
+      "loss": 16.451,
+      "step": 541
+    },
+    {
+      "epoch": 0.06478410279396385,
+      "grad_norm": 6.229999542236328,
+      "learning_rate": 0.0002481299099259517,
+      "loss": 15.3861,
+      "step": 542
+    },
+    {
+      "epoch": 0.06490363065889736,
+      "grad_norm": 6.283480167388916,
+      "learning_rate": 0.000248122890569978,
+      "loss": 16.3622,
+      "step": 543
+    },
+    {
+      "epoch": 0.06502315852383087,
+      "grad_norm": 5.996384620666504,
+      "learning_rate": 0.0002481158581648034,
+      "loss": 15.9675,
+      "step": 544
+    },
+    {
+      "epoch": 0.06514268638876439,
+      "grad_norm": 5.98213529586792,
+      "learning_rate": 0.00024810881271117324,
+      "loss": 15.637,
+      "step": 545
+    },
+    {
+      "epoch": 0.0652622142536979,
+      "grad_norm": 6.469746112823486,
+      "learning_rate": 0.0002481017542098342,
+      "loss": 16.3619,
+      "step": 546
+    },
+    {
+      "epoch": 0.0653817421186314,
+      "grad_norm": 6.57551383972168,
+      "learning_rate": 0.0002480946826615344,
+      "loss": 17.0118,
+      "step": 547
+    },
+    {
+      "epoch": 0.06550126998356492,
+      "grad_norm": 6.5282182693481445,
+      "learning_rate": 0.0002480875980670233,
+      "loss": 18.4883,
+      "step": 548
+    },
+    {
+      "epoch": 0.06562079784849843,
+      "grad_norm": 6.027569770812988,
+      "learning_rate": 0.0002480805004270518,
+      "loss": 15.2508,
+      "step": 549
+    },
+    {
+      "epoch": 0.06574032571343194,
+      "grad_norm": 6.519893646240234,
+      "learning_rate": 0.0002480733897423721,
+      "loss": 17.2495,
+      "step": 550
+    },
+    {
+      "epoch": 0.06585985357836546,
+      "grad_norm": 5.971823215484619,
+      "learning_rate": 0.0002480662660137378,
+      "loss": 15.5504,
+      "step": 551
+    },
+    {
+      "epoch": 0.06597938144329897,
+      "grad_norm": 6.29182243347168,
+      "learning_rate": 0.000248059129241904,
+      "loss": 15.4693,
+      "step": 552
+    },
+    {
+      "epoch": 0.06609890930823248,
+      "grad_norm": 6.490065574645996,
+      "learning_rate": 0.000248051979427627,
+      "loss": 15.931,
+      "step": 553
+    },
+    {
+      "epoch": 0.066218437173166,
+      "grad_norm": 6.518825531005859,
+      "learning_rate": 0.00024804481657166467,
+      "loss": 16.3236,
+      "step": 554
+    },
+    {
+      "epoch": 0.06633796503809951,
+      "grad_norm": 6.617817401885986,
+      "learning_rate": 0.0002480376406747761,
+      "loss": 15.8417,
+      "step": 555
+    },
+    {
+      "epoch": 0.06645749290303302,
+      "grad_norm": 6.263620376586914,
+      "learning_rate": 0.0002480304517377218,
+      "loss": 16.4035,
+      "step": 556
+    },
+    {
+      "epoch": 0.06657702076796654,
+      "grad_norm": 6.821305274963379,
+      "learning_rate": 0.0002480232497612638,
+      "loss": 16.9507,
+      "step": 557
+    },
+    {
+      "epoch": 0.06669654863290005,
+      "grad_norm": 6.247030735015869,
+      "learning_rate": 0.0002480160347461653,
+      "loss": 16.5576,
+      "step": 558
+    },
+    {
+      "epoch": 0.06681607649783355,
+      "grad_norm": 6.491495132446289,
+      "learning_rate": 0.00024800880669319106,
+      "loss": 18.7603,
+      "step": 559
+    },
+    {
+      "epoch": 0.06693560436276708,
+      "grad_norm": 6.250596523284912,
+      "learning_rate": 0.0002480015656031071,
+      "loss": 15.7767,
+      "step": 560
+    },
+    {
+      "epoch": 0.06705513222770058,
+      "grad_norm": 6.2819390296936035,
+      "learning_rate": 0.0002479943114766808,
+      "loss": 15.8,
+      "step": 561
+    },
+    {
+      "epoch": 0.06717466009263409,
+      "grad_norm": 5.973819732666016,
+      "learning_rate": 0.0002479870443146811,
+      "loss": 15.6882,
+      "step": 562
+    },
+    {
+      "epoch": 0.06729418795756761,
+      "grad_norm": 6.051755428314209,
+      "learning_rate": 0.0002479797641178782,
+      "loss": 16.4719,
+      "step": 563
+    },
+    {
+      "epoch": 0.06741371582250112,
+      "grad_norm": 6.508502960205078,
+      "learning_rate": 0.00024797247088704366,
+      "loss": 16.9064,
+      "step": 564
+    },
+    {
+      "epoch": 0.06753324368743463,
+      "grad_norm": 6.121264934539795,
+      "learning_rate": 0.0002479651646229505,
+      "loss": 16.2867,
+      "step": 565
+    },
+    {
+      "epoch": 0.06765277155236815,
+      "grad_norm": 6.16292667388916,
+      "learning_rate": 0.00024795784532637296,
+      "loss": 15.8647,
+      "step": 566
+    },
+    {
+      "epoch": 0.06777229941730166,
+      "grad_norm": 6.289575099945068,
+      "learning_rate": 0.0002479505129980869,
+      "loss": 16.5442,
+      "step": 567
+    },
+    {
+      "epoch": 0.06789182728223517,
+      "grad_norm": 6.375883102416992,
+      "learning_rate": 0.0002479431676388694,
+      "loss": 16.143,
+      "step": 568
+    },
+    {
+      "epoch": 0.06801135514716869,
+      "grad_norm": 6.734157562255859,
+      "learning_rate": 0.00024793580924949897,
+      "loss": 16.5816,
+      "step": 569
+    },
+    {
+      "epoch": 0.0681308830121022,
+      "grad_norm": 5.916708469390869,
+      "learning_rate": 0.00024792843783075544,
+      "loss": 15.6337,
+      "step": 570
+    },
+    {
+      "epoch": 0.0682504108770357,
+      "grad_norm": 5.987206935882568,
+      "learning_rate": 0.00024792105338342015,
+      "loss": 16.1654,
+      "step": 571
+    },
+    {
+      "epoch": 0.06836993874196923,
+      "grad_norm": 7.18080997467041,
+      "learning_rate": 0.00024791365590827566,
+      "loss": 17.0761,
+      "step": 572
+    },
+    {
+      "epoch": 0.06848946660690274,
+      "grad_norm": 5.998250961303711,
+      "learning_rate": 0.000247906245406106,
+      "loss": 15.8114,
+      "step": 573
+    },
+    {
+      "epoch": 0.06860899447183624,
+      "grad_norm": 5.987746715545654,
+      "learning_rate": 0.0002478988218776967,
+      "loss": 16.9754,
+      "step": 574
+    },
+    {
+      "epoch": 0.06872852233676977,
+      "grad_norm": 6.771132469177246,
+      "learning_rate": 0.0002478913853238344,
+      "loss": 16.317,
+      "step": 575
+    },
+    {
+      "epoch": 0.06884805020170327,
+      "grad_norm": 6.411704063415527,
+      "learning_rate": 0.00024788393574530726,
+      "loss": 17.1471,
+      "step": 576
+    },
+    {
+      "epoch": 0.06896757806663678,
+      "grad_norm": 6.628751754760742,
+      "learning_rate": 0.0002478764731429049,
+      "loss": 16.4329,
+      "step": 577
+    },
+    {
+      "epoch": 0.0690871059315703,
+      "grad_norm": 7.8661651611328125,
+      "learning_rate": 0.00024786899751741827,
+      "loss": 15.7631,
+      "step": 578
+    },
+    {
+      "epoch": 0.06920663379650381,
+      "grad_norm": 6.639915943145752,
+      "learning_rate": 0.0002478615088696396,
+      "loss": 16.3586,
+      "step": 579
+    },
+    {
+      "epoch": 0.06932616166143732,
+      "grad_norm": 6.35560941696167,
+      "learning_rate": 0.0002478540072003626,
+      "loss": 17.8284,
+      "step": 580
+    },
+    {
+      "epoch": 0.06944568952637084,
+      "grad_norm": 6.45347261428833,
+      "learning_rate": 0.00024784649251038233,
+      "loss": 16.4598,
+      "step": 581
+    },
+    {
+      "epoch": 0.06956521739130435,
+      "grad_norm": 6.136906147003174,
+      "learning_rate": 0.00024783896480049525,
+      "loss": 16.1433,
+      "step": 582
+    },
+    {
+      "epoch": 0.06968474525623786,
+      "grad_norm": 6.355082035064697,
+      "learning_rate": 0.00024783142407149917,
+      "loss": 17.7579,
+      "step": 583
+    },
+    {
+      "epoch": 0.06980427312117138,
+      "grad_norm": 6.28627872467041,
+      "learning_rate": 0.00024782387032419334,
+      "loss": 16.5747,
+      "step": 584
+    },
+    {
+      "epoch": 0.06992380098610489,
+      "grad_norm": 6.3861188888549805,
+      "learning_rate": 0.0002478163035593783,
+      "loss": 16.4684,
+      "step": 585
+    },
+    {
+      "epoch": 0.0700433288510384,
+      "grad_norm": 8.057326316833496,
+      "learning_rate": 0.00024780872377785603,
+      "loss": 16.7268,
+      "step": 586
+    },
+    {
+      "epoch": 0.07016285671597192,
+      "grad_norm": 6.8107805252075195,
+      "learning_rate": 0.0002478011309804298,
+      "loss": 14.9781,
+      "step": 587
+    },
+    {
+      "epoch": 0.07028238458090542,
+      "grad_norm": 7.4561076164245605,
+      "learning_rate": 0.0002477935251679045,
+      "loss": 15.9972,
+      "step": 588
+    },
+    {
+      "epoch": 0.07040191244583893,
+      "grad_norm": 6.438401222229004,
+      "learning_rate": 0.0002477859063410861,
+      "loss": 16.3148,
+      "step": 589
+    },
+    {
+      "epoch": 0.07052144031077245,
+      "grad_norm": 6.0730299949646,
+      "learning_rate": 0.0002477782745007821,
+      "loss": 15.5541,
+      "step": 590
+    },
+    {
+      "epoch": 0.07064096817570596,
+      "grad_norm": 6.609666347503662,
+      "learning_rate": 0.00024777062964780137,
+      "loss": 16.2029,
+      "step": 591
+    },
+    {
+      "epoch": 0.07076049604063947,
+      "grad_norm": 6.8198652267456055,
+      "learning_rate": 0.00024776297178295424,
+      "loss": 17.0991,
+      "step": 592
+    },
+    {
+      "epoch": 0.07088002390557299,
+      "grad_norm": 7.101873874664307,
+      "learning_rate": 0.0002477553009070522,
+      "loss": 15.2356,
+      "step": 593
+    },
+    {
+      "epoch": 0.0709995517705065,
+      "grad_norm": 6.614659786224365,
+      "learning_rate": 0.0002477476170209083,
+      "loss": 16.8781,
+      "step": 594
+    },
+    {
+      "epoch": 0.07111907963544001,
+      "grad_norm": 9.546778678894043,
+      "learning_rate": 0.0002477399201253369,
+      "loss": 18.754,
+      "step": 595
+    },
+    {
+      "epoch": 0.07123860750037353,
+      "grad_norm": 5.819904327392578,
+      "learning_rate": 0.0002477322102211538,
+      "loss": 15.7819,
+      "step": 596
+    },
+    {
+      "epoch": 0.07135813536530704,
+      "grad_norm": 6.660562515258789,
+      "learning_rate": 0.00024772448730917614,
+      "loss": 17.3348,
+      "step": 597
+    },
+    {
+      "epoch": 0.07147766323024055,
+      "grad_norm": 6.330270290374756,
+      "learning_rate": 0.0002477167513902224,
+      "loss": 16.4988,
+      "step": 598
+    },
+    {
+      "epoch": 0.07159719109517407,
+      "grad_norm": 5.982934951782227,
+      "learning_rate": 0.00024770900246511246,
+      "loss": 14.5084,
+      "step": 599
+    },
+    {
+      "epoch": 0.07171671896010758,
+      "grad_norm": 6.04694128036499,
+      "learning_rate": 0.0002477012405346676,
+      "loss": 16.4352,
+      "step": 600
+    },
+    {
+      "epoch": 0.07171671896010758,
+      "eval_loss": 2.0841848850250244,
+      "eval_runtime": 122.6125,
+      "eval_samples_per_second": 40.779,
+      "eval_steps_per_second": 10.195,
+      "step": 600
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.716990563988275e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null