Training in progress, step 900, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34d5f51b1fc8efc396118d7dbc2cf0408eee2351a92c5a4ec4bc3b30c41a6989
 size 73911112

 version https://git-lfs.github.com/spec/v1
+oid sha256:f045355f938f666786ad36d5a7bc9028d0be96a54b23bd9a02def905f8ac3f5f
 size 73911112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ec4a47854db24d29136476a8daf26ea56e784d1056cf5209ee85bb047191d0a
 size 37965684

 version https://git-lfs.github.com/spec/v1
+oid sha256:9189f4679ea4da6f1b3935af0c0173ea9b0016a35be3117e47e4abc4073f0d74
 size 37965684

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db7c2ff2f7a033f1671f18f77925cc4fffd3d1db56cbdc8b5ffb68ee38be4ea1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7645d4bab96924146718c10368503cddfbaffa9b9752acf45c4c650d90ccb1e7
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e7be1d8205568b132245b3133e11fb1272b81edf049bdd1683d95706e20bf3e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f39bff186481b2691af54be9cda77b9297711925f7bc2907a12df9c78be5954c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.574374258518219,
   "best_model_checkpoint": "miner_id_24/checkpoint-700",
-  "epoch": 1.0942041374593947,
   "eval_steps": 100,
-  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5679,6 +5679,714 @@
       "eval_samples_per_second": 31.748,
       "eval_steps_per_second": 7.937,
       "step": 800
     }
   ],
   "logging_steps": 1,
@@ -5693,7 +6401,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -5702,12 +6410,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.2854806136265114e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.574374258518219,
   "best_model_checkpoint": "miner_id_24/checkpoint-700",
+  "epoch": 1.230979654641819,
   "eval_steps": 100,
+  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 31.748,
       "eval_steps_per_second": 7.937,
       "step": 800
+    },
+    {
+      "epoch": 1.095571892631219,
+      "grad_norm": 0.28456875681877136,
+      "learning_rate": 8.609934310386318e-05,
+      "loss": 0.6427,
+      "step": 801
+    },
+    {
+      "epoch": 1.0969396478030433,
+      "grad_norm": 0.29876023530960083,
+      "learning_rate": 8.588526066457907e-05,
+      "loss": 0.5213,
+      "step": 802
+    },
+    {
+      "epoch": 1.0983074029748674,
+      "grad_norm": 0.2823258936405182,
+      "learning_rate": 8.567124420968549e-05,
+      "loss": 0.6555,
+      "step": 803
+    },
+    {
+      "epoch": 1.0996751581466917,
+      "grad_norm": 0.28708407282829285,
+      "learning_rate": 8.545729473967884e-05,
+      "loss": 0.618,
+      "step": 804
+    },
+    {
+      "epoch": 1.101042913318516,
+      "grad_norm": 0.2833726108074188,
+      "learning_rate": 8.52434132547423e-05,
+      "loss": 0.6349,
+      "step": 805
+    },
+    {
+      "epoch": 1.1024106684903403,
+      "grad_norm": 0.33605238795280457,
+      "learning_rate": 8.502960075474131e-05,
+      "loss": 0.4921,
+      "step": 806
+    },
+    {
+      "epoch": 1.1037784236621644,
+      "grad_norm": 0.3451543152332306,
+      "learning_rate": 8.481585823921873e-05,
+      "loss": 0.5024,
+      "step": 807
+    },
+    {
+      "epoch": 1.1051461788339887,
+      "grad_norm": 0.23923547565937042,
+      "learning_rate": 8.460218670739031e-05,
+      "loss": 0.3788,
+      "step": 808
+    },
+    {
+      "epoch": 1.106513934005813,
+      "grad_norm": 0.2655201852321625,
+      "learning_rate": 8.438858715813996e-05,
+      "loss": 0.5876,
+      "step": 809
+    },
+    {
+      "epoch": 1.1078816891776373,
+      "grad_norm": 0.2713606357574463,
+      "learning_rate": 8.417506059001505e-05,
+      "loss": 0.397,
+      "step": 810
+    },
+    {
+      "epoch": 1.1092494443494614,
+      "grad_norm": 0.24983306229114532,
+      "learning_rate": 8.396160800122176e-05,
+      "loss": 0.3463,
+      "step": 811
+    },
+    {
+      "epoch": 1.1106171995212857,
+      "grad_norm": 0.28522688150405884,
+      "learning_rate": 8.374823038962056e-05,
+      "loss": 0.4376,
+      "step": 812
+    },
+    {
+      "epoch": 1.11198495469311,
+      "grad_norm": 0.2977089583873749,
+      "learning_rate": 8.353492875272118e-05,
+      "loss": 0.4729,
+      "step": 813
+    },
+    {
+      "epoch": 1.1133527098649343,
+      "grad_norm": 0.2716682553291321,
+      "learning_rate": 8.33217040876784e-05,
+      "loss": 0.5256,
+      "step": 814
+    },
+    {
+      "epoch": 1.1147204650367584,
+      "grad_norm": 0.34533944725990295,
+      "learning_rate": 8.310855739128703e-05,
+      "loss": 0.5092,
+      "step": 815
+    },
+    {
+      "epoch": 1.1160882202085827,
+      "grad_norm": 0.2501341998577118,
+      "learning_rate": 8.289548965997748e-05,
+      "loss": 0.4022,
+      "step": 816
+    },
+    {
+      "epoch": 1.117455975380407,
+      "grad_norm": 0.2617763578891754,
+      "learning_rate": 8.26825018898109e-05,
+      "loss": 0.3671,
+      "step": 817
+    },
+    {
+      "epoch": 1.118823730552231,
+      "grad_norm": 0.2673083543777466,
+      "learning_rate": 8.246959507647478e-05,
+      "loss": 0.4135,
+      "step": 818
+    },
+    {
+      "epoch": 1.1201914857240554,
+      "grad_norm": 0.31147336959838867,
+      "learning_rate": 8.225677021527798e-05,
+      "loss": 0.6971,
+      "step": 819
+    },
+    {
+      "epoch": 1.1215592408958797,
+      "grad_norm": 0.23961347341537476,
+      "learning_rate": 8.20440283011464e-05,
+      "loss": 0.3846,
+      "step": 820
+    },
+    {
+      "epoch": 1.122926996067704,
+      "grad_norm": 0.2794473469257355,
+      "learning_rate": 8.183137032861804e-05,
+      "loss": 0.3729,
+      "step": 821
+    },
+    {
+      "epoch": 1.124294751239528,
+      "grad_norm": 0.32503047585487366,
+      "learning_rate": 8.16187972918386e-05,
+      "loss": 0.6592,
+      "step": 822
+    },
+    {
+      "epoch": 1.1256625064113523,
+      "grad_norm": 0.3039405643939972,
+      "learning_rate": 8.140631018455664e-05,
+      "loss": 0.4657,
+      "step": 823
+    },
+    {
+      "epoch": 1.1270302615831767,
+      "grad_norm": 0.36968639492988586,
+      "learning_rate": 8.11939100001191e-05,
+      "loss": 0.5455,
+      "step": 824
+    },
+    {
+      "epoch": 1.1283980167550007,
+      "grad_norm": 0.27475252747535706,
+      "learning_rate": 8.098159773146639e-05,
+      "loss": 0.4267,
+      "step": 825
+    },
+    {
+      "epoch": 1.129765771926825,
+      "grad_norm": 0.29142552614212036,
+      "learning_rate": 8.07693743711282e-05,
+      "loss": 0.3276,
+      "step": 826
+    },
+    {
+      "epoch": 1.1311335270986493,
+      "grad_norm": 0.282996267080307,
+      "learning_rate": 8.055724091121826e-05,
+      "loss": 0.4624,
+      "step": 827
+    },
+    {
+      "epoch": 1.1325012822704736,
+      "grad_norm": 0.24529734253883362,
+      "learning_rate": 8.034519834343033e-05,
+      "loss": 0.5003,
+      "step": 828
+    },
+    {
+      "epoch": 1.133869037442298,
+      "grad_norm": 0.2863565683364868,
+      "learning_rate": 8.013324765903305e-05,
+      "loss": 0.552,
+      "step": 829
+    },
+    {
+      "epoch": 1.135236792614122,
+      "grad_norm": 0.2725461423397064,
+      "learning_rate": 7.992138984886567e-05,
+      "loss": 0.5248,
+      "step": 830
+    },
+    {
+      "epoch": 1.1366045477859463,
+      "grad_norm": 0.33715713024139404,
+      "learning_rate": 7.970962590333316e-05,
+      "loss": 0.6072,
+      "step": 831
+    },
+    {
+      "epoch": 1.1379723029577706,
+      "grad_norm": 0.26902320981025696,
+      "learning_rate": 7.949795681240168e-05,
+      "loss": 0.4953,
+      "step": 832
+    },
+    {
+      "epoch": 1.1393400581295947,
+      "grad_norm": 0.2683982849121094,
+      "learning_rate": 7.928638356559407e-05,
+      "loss": 0.6736,
+      "step": 833
+    },
+    {
+      "epoch": 1.140707813301419,
+      "grad_norm": 0.32076001167297363,
+      "learning_rate": 7.9074907151985e-05,
+      "loss": 0.5829,
+      "step": 834
+    },
+    {
+      "epoch": 1.1420755684732433,
+      "grad_norm": 0.29337945580482483,
+      "learning_rate": 7.886352856019653e-05,
+      "loss": 0.4851,
+      "step": 835
+    },
+    {
+      "epoch": 1.1434433236450676,
+      "grad_norm": 0.24045072495937347,
+      "learning_rate": 7.865224877839333e-05,
+      "loss": 0.3578,
+      "step": 836
+    },
+    {
+      "epoch": 1.1448110788168917,
+      "grad_norm": 0.257567822933197,
+      "learning_rate": 7.844106879427829e-05,
+      "loss": 0.4057,
+      "step": 837
+    },
+    {
+      "epoch": 1.146178833988716,
+      "grad_norm": 0.2878836989402771,
+      "learning_rate": 7.82299895950876e-05,
+      "loss": 0.4812,
+      "step": 838
+    },
+    {
+      "epoch": 1.1475465891605403,
+      "grad_norm": 0.3263901174068451,
+      "learning_rate": 7.801901216758642e-05,
+      "loss": 0.6451,
+      "step": 839
+    },
+    {
+      "epoch": 1.1489143443323644,
+      "grad_norm": 0.25824493169784546,
+      "learning_rate": 7.780813749806407e-05,
+      "loss": 0.4484,
+      "step": 840
+    },
+    {
+      "epoch": 1.1502820995041887,
+      "grad_norm": 0.25362029671669006,
+      "learning_rate": 7.759736657232951e-05,
+      "loss": 0.4383,
+      "step": 841
+    },
+    {
+      "epoch": 1.151649854676013,
+      "grad_norm": 0.3550529479980469,
+      "learning_rate": 7.73867003757067e-05,
+      "loss": 0.5098,
+      "step": 842
+    },
+    {
+      "epoch": 1.1530176098478373,
+      "grad_norm": 0.3048471510410309,
+      "learning_rate": 7.717613989303011e-05,
+      "loss": 0.5214,
+      "step": 843
+    },
+    {
+      "epoch": 1.1543853650196616,
+      "grad_norm": 0.29699233174324036,
+      "learning_rate": 7.696568610863978e-05,
+      "loss": 0.4679,
+      "step": 844
+    },
+    {
+      "epoch": 1.1557531201914857,
+      "grad_norm": 0.29977262020111084,
+      "learning_rate": 7.675534000637724e-05,
+      "loss": 0.4834,
+      "step": 845
+    },
+    {
+      "epoch": 1.15712087536331,
+      "grad_norm": 0.3024733066558838,
+      "learning_rate": 7.654510256958039e-05,
+      "loss": 0.5326,
+      "step": 846
+    },
+    {
+      "epoch": 1.1584886305351343,
+      "grad_norm": 0.3622778058052063,
+      "learning_rate": 7.633497478107925e-05,
+      "loss": 0.6334,
+      "step": 847
+    },
+    {
+      "epoch": 1.1598563857069584,
+      "grad_norm": 0.3505115509033203,
+      "learning_rate": 7.612495762319121e-05,
+      "loss": 0.4456,
+      "step": 848
+    },
+    {
+      "epoch": 1.1612241408787827,
+      "grad_norm": 0.30360838770866394,
+      "learning_rate": 7.59150520777165e-05,
+      "loss": 0.4395,
+      "step": 849
+    },
+    {
+      "epoch": 1.162591896050607,
+      "grad_norm": 0.2924692630767822,
+      "learning_rate": 7.570525912593354e-05,
+      "loss": 0.4935,
+      "step": 850
+    },
+    {
+      "epoch": 1.1639596512224313,
+      "grad_norm": 0.30422306060791016,
+      "learning_rate": 7.549557974859446e-05,
+      "loss": 0.5371,
+      "step": 851
+    },
+    {
+      "epoch": 1.1653274063942554,
+      "grad_norm": 0.3041650354862213,
+      "learning_rate": 7.528601492592037e-05,
+      "loss": 0.4409,
+      "step": 852
+    },
+    {
+      "epoch": 1.1666951615660797,
+      "grad_norm": 0.29774245619773865,
+      "learning_rate": 7.507656563759691e-05,
+      "loss": 0.5018,
+      "step": 853
+    },
+    {
+      "epoch": 1.168062916737904,
+      "grad_norm": 0.3011612892150879,
+      "learning_rate": 7.486723286276953e-05,
+      "loss": 0.4171,
+      "step": 854
+    },
+    {
+      "epoch": 1.169430671909728,
+      "grad_norm": 0.2770425081253052,
+      "learning_rate": 7.465801758003918e-05,
+      "loss": 0.3773,
+      "step": 855
+    },
+    {
+      "epoch": 1.1707984270815524,
+      "grad_norm": 0.2873842716217041,
+      "learning_rate": 7.444892076745728e-05,
+      "loss": 0.5557,
+      "step": 856
+    },
+    {
+      "epoch": 1.1721661822533767,
+      "grad_norm": 0.2865909934043884,
+      "learning_rate": 7.423994340252169e-05,
+      "loss": 0.39,
+      "step": 857
+    },
+    {
+      "epoch": 1.173533937425201,
+      "grad_norm": 0.2772740423679352,
+      "learning_rate": 7.403108646217165e-05,
+      "loss": 0.5739,
+      "step": 858
+    },
+    {
+      "epoch": 1.174901692597025,
+      "grad_norm": 0.3060879409313202,
+      "learning_rate": 7.382235092278359e-05,
+      "loss": 0.5719,
+      "step": 859
+    },
+    {
+      "epoch": 1.1762694477688493,
+      "grad_norm": 0.33835381269454956,
+      "learning_rate": 7.361373776016631e-05,
+      "loss": 0.536,
+      "step": 860
+    },
+    {
+      "epoch": 1.1776372029406736,
+      "grad_norm": 0.28969088196754456,
+      "learning_rate": 7.340524794955659e-05,
+      "loss": 0.4098,
+      "step": 861
+    },
+    {
+      "epoch": 1.179004958112498,
+      "grad_norm": 0.29033181071281433,
+      "learning_rate": 7.319688246561447e-05,
+      "loss": 0.5777,
+      "step": 862
+    },
+    {
+      "epoch": 1.180372713284322,
+      "grad_norm": 0.3036806583404541,
+      "learning_rate": 7.298864228241887e-05,
+      "loss": 0.4617,
+      "step": 863
+    },
+    {
+      "epoch": 1.1817404684561463,
+      "grad_norm": 0.313621461391449,
+      "learning_rate": 7.278052837346288e-05,
+      "loss": 0.6076,
+      "step": 864
+    },
+    {
+      "epoch": 1.1831082236279706,
+      "grad_norm": 0.27229294180870056,
+      "learning_rate": 7.257254171164932e-05,
+      "loss": 0.4272,
+      "step": 865
+    },
+    {
+      "epoch": 1.184475978799795,
+      "grad_norm": 0.308609277009964,
+      "learning_rate": 7.236468326928612e-05,
+      "loss": 0.5481,
+      "step": 866
+    },
+    {
+      "epoch": 1.185843733971619,
+      "grad_norm": 0.29016566276550293,
+      "learning_rate": 7.21569540180818e-05,
+      "loss": 0.4163,
+      "step": 867
+    },
+    {
+      "epoch": 1.1872114891434433,
+      "grad_norm": 0.33708828687667847,
+      "learning_rate": 7.1949354929141e-05,
+      "loss": 0.5613,
+      "step": 868
+    },
+    {
+      "epoch": 1.1885792443152676,
+      "grad_norm": 0.32777532935142517,
+      "learning_rate": 7.174188697295971e-05,
+      "loss": 0.4931,
+      "step": 869
+    },
+    {
+      "epoch": 1.1899469994870917,
+      "grad_norm": 0.2704114615917206,
+      "learning_rate": 7.153455111942108e-05,
+      "loss": 0.4251,
+      "step": 870
+    },
+    {
+      "epoch": 1.191314754658916,
+      "grad_norm": 0.3016801178455353,
+      "learning_rate": 7.132734833779054e-05,
+      "loss": 0.4761,
+      "step": 871
+    },
+    {
+      "epoch": 1.1926825098307403,
+      "grad_norm": 0.31739842891693115,
+      "learning_rate": 7.112027959671162e-05,
+      "loss": 0.5516,
+      "step": 872
+    },
+    {
+      "epoch": 1.1940502650025646,
+      "grad_norm": 0.3556269705295563,
+      "learning_rate": 7.091334586420097e-05,
+      "loss": 0.5474,
+      "step": 873
+    },
+    {
+      "epoch": 1.1954180201743887,
+      "grad_norm": 0.31290698051452637,
+      "learning_rate": 7.070654810764438e-05,
+      "loss": 0.5217,
+      "step": 874
+    },
+    {
+      "epoch": 1.196785775346213,
+      "grad_norm": 0.2770576775074005,
+      "learning_rate": 7.049988729379168e-05,
+      "loss": 0.4103,
+      "step": 875
+    },
+    {
+      "epoch": 1.1981535305180373,
+      "grad_norm": 0.2821738123893738,
+      "learning_rate": 7.029336438875279e-05,
+      "loss": 0.5438,
+      "step": 876
+    },
+    {
+      "epoch": 1.1995212856898616,
+      "grad_norm": 0.3358316123485565,
+      "learning_rate": 7.008698035799268e-05,
+      "loss": 0.8765,
+      "step": 877
+    },
+    {
+      "epoch": 1.2008890408616857,
+      "grad_norm": 0.31030896306037903,
+      "learning_rate": 6.988073616632732e-05,
+      "loss": 0.5541,
+      "step": 878
+    },
+    {
+      "epoch": 1.20225679603351,
+      "grad_norm": 0.30155354738235474,
+      "learning_rate": 6.967463277791879e-05,
+      "loss": 0.4087,
+      "step": 879
+    },
+    {
+      "epoch": 1.2036245512053343,
+      "grad_norm": 0.2397313416004181,
+      "learning_rate": 6.946867115627103e-05,
+      "loss": 0.4809,
+      "step": 880
+    },
+    {
+      "epoch": 1.2049923063771586,
+      "grad_norm": 0.2920851707458496,
+      "learning_rate": 6.926285226422515e-05,
+      "loss": 0.5256,
+      "step": 881
+    },
+    {
+      "epoch": 1.2063600615489827,
+      "grad_norm": 0.2639416754245758,
+      "learning_rate": 6.905717706395516e-05,
+      "loss": 0.4735,
+      "step": 882
+    },
+    {
+      "epoch": 1.207727816720807,
+      "grad_norm": 0.32958149909973145,
+      "learning_rate": 6.885164651696317e-05,
+      "loss": 0.5341,
+      "step": 883
+    },
+    {
+      "epoch": 1.2090955718926313,
+      "grad_norm": 0.2795344591140747,
+      "learning_rate": 6.86462615840752e-05,
+      "loss": 0.4619,
+      "step": 884
+    },
+    {
+      "epoch": 1.2104633270644554,
+      "grad_norm": 0.2936275005340576,
+      "learning_rate": 6.844102322543638e-05,
+      "loss": 0.4208,
+      "step": 885
+    },
+    {
+      "epoch": 1.2118310822362797,
+      "grad_norm": 0.4172208905220032,
+      "learning_rate": 6.823593240050685e-05,
+      "loss": 0.6143,
+      "step": 886
+    },
+    {
+      "epoch": 1.213198837408104,
+      "grad_norm": 0.2924855351448059,
+      "learning_rate": 6.80309900680568e-05,
+      "loss": 0.403,
+      "step": 887
+    },
+    {
+      "epoch": 1.2145665925799283,
+      "grad_norm": 0.30166590213775635,
+      "learning_rate": 6.782619718616252e-05,
+      "loss": 0.7077,
+      "step": 888
+    },
+    {
+      "epoch": 1.2159343477517524,
+      "grad_norm": 0.26507216691970825,
+      "learning_rate": 6.762155471220131e-05,
+      "loss": 0.4196,
+      "step": 889
+    },
+    {
+      "epoch": 1.2173021029235767,
+      "grad_norm": 0.2820349931716919,
+      "learning_rate": 6.741706360284763e-05,
+      "loss": 0.5192,
+      "step": 890
+    },
+    {
+      "epoch": 1.218669858095401,
+      "grad_norm": 0.2856913208961487,
+      "learning_rate": 6.721272481406817e-05,
+      "loss": 0.5337,
+      "step": 891
+    },
+    {
+      "epoch": 1.2200376132672253,
+      "grad_norm": 0.3205767273902893,
+      "learning_rate": 6.70085393011176e-05,
+      "loss": 0.3494,
+      "step": 892
+    },
+    {
+      "epoch": 1.2214053684390493,
+      "grad_norm": 0.3018949031829834,
+      "learning_rate": 6.6804508018534e-05,
+      "loss": 0.35,
+      "step": 893
+    },
+    {
+      "epoch": 1.2227731236108736,
+      "grad_norm": 0.3356417119503021,
+      "learning_rate": 6.660063192013451e-05,
+      "loss": 0.4085,
+      "step": 894
+    },
+    {
+      "epoch": 1.224140878782698,
+      "grad_norm": 0.31260353326797485,
+      "learning_rate": 6.639691195901076e-05,
+      "loss": 0.3837,
+      "step": 895
+    },
+    {
+      "epoch": 1.2255086339545223,
+      "grad_norm": 0.2612607181072235,
+      "learning_rate": 6.619334908752447e-05,
+      "loss": 0.4347,
+      "step": 896
+    },
+    {
+      "epoch": 1.2268763891263463,
+      "grad_norm": 0.26736879348754883,
+      "learning_rate": 6.598994425730299e-05,
+      "loss": 0.537,
+      "step": 897
+    },
+    {
+      "epoch": 1.2282441442981706,
+      "grad_norm": 0.2847106158733368,
+      "learning_rate": 6.578669841923486e-05,
+      "loss": 0.5361,
+      "step": 898
+    },
+    {
+      "epoch": 1.229611899469995,
+      "grad_norm": 0.254794716835022,
+      "learning_rate": 6.558361252346532e-05,
+      "loss": 0.454,
+      "step": 899
+    },
+    {
+      "epoch": 1.230979654641819,
+      "grad_norm": 0.27473878860473633,
+      "learning_rate": 6.5380687519392e-05,
+      "loss": 0.5392,
+      "step": 900
+    },
+    {
+      "epoch": 1.230979654641819,
+      "eval_loss": 0.5753397941589355,
+      "eval_runtime": 38.7964,
+      "eval_samples_per_second": 31.756,
+      "eval_steps_per_second": 7.939,
+      "step": 900
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.57057378726314e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null