Training in progress, step 1200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77f48100db5f5032f23d7276a01f964e6643d1b32dc34750c01745f7a6444537
 size 35237104

 version https://git-lfs.github.com/spec/v1
+oid sha256:21f1f3acda76ad4e88a7c83adc2e48f1ee2adc3335201431003f84c84136a9cb
 size 35237104

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a057d7537543d26b8405f1b3a06913f95079cb0260e6e77cb6e67aff79016b59
 size 18810356

 version https://git-lfs.github.com/spec/v1
+oid sha256:127f582c7769f6b1fec4db6c3c889c96511b276e8258d33333afdb44ee48b79a
 size 18810356

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55efe0fdedeb3aabf3711868667a8b9d1adcffe6440d9e5347d8fa8bfb07e987
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ea4a928811810f84d0f8b5936c44f2709f0f2cadb34cf4fa9a714f98b7ed5c0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9b96617578bfc230d7a81fb4950d7c67ffa18c14dffcf3189f0dd3ecbd4b36d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b30780423ff45c94e8ad2d6a438363a868ec26f078372884947da902f24979de
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.8903758525848389,
   "best_model_checkpoint": "miner_id_24/checkpoint-1000",
-  "epoch": 1.0793572917944314,
   "eval_steps": 100,
-  "global_step": 1100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -7803,6 +7803,714 @@
       "eval_samples_per_second": 58.681,
       "eval_steps_per_second": 14.681,
       "step": 1100
     }
   ],
   "logging_steps": 1,
@@ -7817,7 +8525,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -7826,12 +8534,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.929821414424576e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.8903758525848389,
   "best_model_checkpoint": "miner_id_24/checkpoint-1000",
+  "epoch": 1.1774806819575616,
   "eval_steps": 100,
+  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 58.681,
       "eval_steps_per_second": 14.681,
       "step": 1100
+    },
+    {
+      "epoch": 1.0803385256960627,
+      "grad_norm": 2.1303536891937256,
+      "learning_rate": 8.818283612384952e-05,
+      "loss": 1.6892,
+      "step": 1101
+    },
+    {
+      "epoch": 1.081319759597694,
+      "grad_norm": 2.003086566925049,
+      "learning_rate": 8.80291007154402e-05,
+      "loss": 1.5528,
+      "step": 1102
+    },
+    {
+      "epoch": 1.0823009934993255,
+      "grad_norm": 2.045468807220459,
+      "learning_rate": 8.787539400573867e-05,
+      "loss": 1.677,
+      "step": 1103
+    },
+    {
+      "epoch": 1.0832822274009568,
+      "grad_norm": 1.911670446395874,
+      "learning_rate": 8.772171636323714e-05,
+      "loss": 1.6431,
+      "step": 1104
+    },
+    {
+      "epoch": 1.084263461302588,
+      "grad_norm": 2.1892030239105225,
+      "learning_rate": 8.75680681563582e-05,
+      "loss": 1.7828,
+      "step": 1105
+    },
+    {
+      "epoch": 1.0852446952042194,
+      "grad_norm": 1.965929627418518,
+      "learning_rate": 8.74144497534539e-05,
+      "loss": 1.6294,
+      "step": 1106
+    },
+    {
+      "epoch": 1.0862259291058507,
+      "grad_norm": 2.0876922607421875,
+      "learning_rate": 8.726086152280483e-05,
+      "loss": 1.6393,
+      "step": 1107
+    },
+    {
+      "epoch": 1.087207163007482,
+      "grad_norm": 2.0882608890533447,
+      "learning_rate": 8.710730383261916e-05,
+      "loss": 1.7047,
+      "step": 1108
+    },
+    {
+      "epoch": 1.0881883969091133,
+      "grad_norm": 1.7960344552993774,
+      "learning_rate": 8.695377705103199e-05,
+      "loss": 1.4743,
+      "step": 1109
+    },
+    {
+      "epoch": 1.0891696308107446,
+      "grad_norm": 2.054779291152954,
+      "learning_rate": 8.680028154610419e-05,
+      "loss": 1.8039,
+      "step": 1110
+    },
+    {
+      "epoch": 1.0901508647123759,
+      "grad_norm": 1.9848898649215698,
+      "learning_rate": 8.66468176858217e-05,
+      "loss": 1.6247,
+      "step": 1111
+    },
+    {
+      "epoch": 1.0911320986140072,
+      "grad_norm": 1.876171350479126,
+      "learning_rate": 8.649338583809466e-05,
+      "loss": 1.4708,
+      "step": 1112
+    },
+    {
+      "epoch": 1.0921133325156385,
+      "grad_norm": 2.000190019607544,
+      "learning_rate": 8.633998637075634e-05,
+      "loss": 1.6452,
+      "step": 1113
+    },
+    {
+      "epoch": 1.0930945664172698,
+      "grad_norm": 1.9362976551055908,
+      "learning_rate": 8.618661965156243e-05,
+      "loss": 1.5954,
+      "step": 1114
+    },
+    {
+      "epoch": 1.094075800318901,
+      "grad_norm": 2.1642391681671143,
+      "learning_rate": 8.60332860481902e-05,
+      "loss": 1.5963,
+      "step": 1115
+    },
+    {
+      "epoch": 1.0950570342205324,
+      "grad_norm": 1.9460209608078003,
+      "learning_rate": 8.587998592823738e-05,
+      "loss": 1.5983,
+      "step": 1116
+    },
+    {
+      "epoch": 1.0960382681221637,
+      "grad_norm": 2.0492846965789795,
+      "learning_rate": 8.572671965922148e-05,
+      "loss": 1.7892,
+      "step": 1117
+    },
+    {
+      "epoch": 1.097019502023795,
+      "grad_norm": 1.9556766748428345,
+      "learning_rate": 8.557348760857899e-05,
+      "loss": 1.398,
+      "step": 1118
+    },
+    {
+      "epoch": 1.0980007359254262,
+      "grad_norm": 2.0801281929016113,
+      "learning_rate": 8.542029014366419e-05,
+      "loss": 1.6695,
+      "step": 1119
+    },
+    {
+      "epoch": 1.0989819698270575,
+      "grad_norm": 1.9107387065887451,
+      "learning_rate": 8.526712763174844e-05,
+      "loss": 1.5044,
+      "step": 1120
+    },
+    {
+      "epoch": 1.0999632037286888,
+      "grad_norm": 2.10068416595459,
+      "learning_rate": 8.511400044001948e-05,
+      "loss": 1.7394,
+      "step": 1121
+    },
+    {
+      "epoch": 1.1009444376303201,
+      "grad_norm": 1.9681599140167236,
+      "learning_rate": 8.496090893558019e-05,
+      "loss": 1.5235,
+      "step": 1122
+    },
+    {
+      "epoch": 1.1019256715319514,
+      "grad_norm": 1.798728108406067,
+      "learning_rate": 8.480785348544802e-05,
+      "loss": 1.4747,
+      "step": 1123
+    },
+    {
+      "epoch": 1.1029069054335827,
+      "grad_norm": 2.044416666030884,
+      "learning_rate": 8.465483445655394e-05,
+      "loss": 1.7187,
+      "step": 1124
+    },
+    {
+      "epoch": 1.103888139335214,
+      "grad_norm": 1.8914976119995117,
+      "learning_rate": 8.450185221574152e-05,
+      "loss": 1.5393,
+      "step": 1125
+    },
+    {
+      "epoch": 1.1048693732368453,
+      "grad_norm": 2.0890233516693115,
+      "learning_rate": 8.434890712976632e-05,
+      "loss": 1.5352,
+      "step": 1126
+    },
+    {
+      "epoch": 1.1058506071384766,
+      "grad_norm": 2.081516742706299,
+      "learning_rate": 8.419599956529466e-05,
+      "loss": 1.7519,
+      "step": 1127
+    },
+    {
+      "epoch": 1.106831841040108,
+      "grad_norm": 1.9843311309814453,
+      "learning_rate": 8.404312988890299e-05,
+      "loss": 1.6323,
+      "step": 1128
+    },
+    {
+      "epoch": 1.1078130749417392,
+      "grad_norm": 2.055711269378662,
+      "learning_rate": 8.389029846707688e-05,
+      "loss": 1.6937,
+      "step": 1129
+    },
+    {
+      "epoch": 1.1087943088433705,
+      "grad_norm": 2.023723840713501,
+      "learning_rate": 8.373750566621027e-05,
+      "loss": 1.5469,
+      "step": 1130
+    },
+    {
+      "epoch": 1.1097755427450018,
+      "grad_norm": 1.9585527181625366,
+      "learning_rate": 8.358475185260438e-05,
+      "loss": 1.5684,
+      "step": 1131
+    },
+    {
+      "epoch": 1.110756776646633,
+      "grad_norm": 1.8710687160491943,
+      "learning_rate": 8.343203739246712e-05,
+      "loss": 1.4615,
+      "step": 1132
+    },
+    {
+      "epoch": 1.1117380105482644,
+      "grad_norm": 2.2277400493621826,
+      "learning_rate": 8.32793626519119e-05,
+      "loss": 1.7425,
+      "step": 1133
+    },
+    {
+      "epoch": 1.1127192444498957,
+      "grad_norm": 2.5818541049957275,
+      "learning_rate": 8.312672799695702e-05,
+      "loss": 1.557,
+      "step": 1134
+    },
+    {
+      "epoch": 1.113700478351527,
+      "grad_norm": 1.8735686540603638,
+      "learning_rate": 8.297413379352469e-05,
+      "loss": 1.5419,
+      "step": 1135
+    },
+    {
+      "epoch": 1.1146817122531583,
+      "grad_norm": 2.1299853324890137,
+      "learning_rate": 8.282158040744003e-05,
+      "loss": 1.5744,
+      "step": 1136
+    },
+    {
+      "epoch": 1.1156629461547896,
+      "grad_norm": 2.173092842102051,
+      "learning_rate": 8.266906820443036e-05,
+      "loss": 1.5719,
+      "step": 1137
+    },
+    {
+      "epoch": 1.1166441800564209,
+      "grad_norm": 2.098710775375366,
+      "learning_rate": 8.251659755012435e-05,
+      "loss": 1.7516,
+      "step": 1138
+    },
+    {
+      "epoch": 1.1176254139580521,
+      "grad_norm": 2.151813507080078,
+      "learning_rate": 8.236416881005093e-05,
+      "loss": 1.8084,
+      "step": 1139
+    },
+    {
+      "epoch": 1.1186066478596834,
+      "grad_norm": 2.144266366958618,
+      "learning_rate": 8.22117823496386e-05,
+      "loss": 1.673,
+      "step": 1140
+    },
+    {
+      "epoch": 1.1195878817613147,
+      "grad_norm": 2.0415525436401367,
+      "learning_rate": 8.205943853421457e-05,
+      "loss": 1.6424,
+      "step": 1141
+    },
+    {
+      "epoch": 1.1205691156629463,
+      "grad_norm": 2.1223199367523193,
+      "learning_rate": 8.190713772900374e-05,
+      "loss": 1.7333,
+      "step": 1142
+    },
+    {
+      "epoch": 1.1215503495645776,
+      "grad_norm": 1.88699471950531,
+      "learning_rate": 8.175488029912783e-05,
+      "loss": 1.5709,
+      "step": 1143
+    },
+    {
+      "epoch": 1.1225315834662088,
+      "grad_norm": 2.1912736892700195,
+      "learning_rate": 8.160266660960472e-05,
+      "loss": 1.8098,
+      "step": 1144
+    },
+    {
+      "epoch": 1.1235128173678401,
+      "grad_norm": 2.0326104164123535,
+      "learning_rate": 8.14504970253474e-05,
+      "loss": 1.6522,
+      "step": 1145
+    },
+    {
+      "epoch": 1.1244940512694714,
+      "grad_norm": 1.9498326778411865,
+      "learning_rate": 8.129837191116298e-05,
+      "loss": 1.5352,
+      "step": 1146
+    },
+    {
+      "epoch": 1.1254752851711027,
+      "grad_norm": 1.959389567375183,
+      "learning_rate": 8.114629163175215e-05,
+      "loss": 1.5821,
+      "step": 1147
+    },
+    {
+      "epoch": 1.126456519072734,
+      "grad_norm": 1.8568264245986938,
+      "learning_rate": 8.099425655170801e-05,
+      "loss": 1.4137,
+      "step": 1148
+    },
+    {
+      "epoch": 1.1274377529743653,
+      "grad_norm": 1.8849796056747437,
+      "learning_rate": 8.084226703551528e-05,
+      "loss": 1.563,
+      "step": 1149
+    },
+    {
+      "epoch": 1.1284189868759966,
+      "grad_norm": 2.0412163734436035,
+      "learning_rate": 8.06903234475495e-05,
+      "loss": 1.5691,
+      "step": 1150
+    },
+    {
+      "epoch": 1.129400220777628,
+      "grad_norm": 1.98798406124115,
+      "learning_rate": 8.053842615207615e-05,
+      "loss": 1.7514,
+      "step": 1151
+    },
+    {
+      "epoch": 1.1303814546792592,
+      "grad_norm": 2.258009433746338,
+      "learning_rate": 8.038657551324955e-05,
+      "loss": 1.8773,
+      "step": 1152
+    },
+    {
+      "epoch": 1.1313626885808905,
+      "grad_norm": 1.8106105327606201,
+      "learning_rate": 8.02347718951124e-05,
+      "loss": 1.5775,
+      "step": 1153
+    },
+    {
+      "epoch": 1.1323439224825218,
+      "grad_norm": 2.030728340148926,
+      "learning_rate": 8.008301566159447e-05,
+      "loss": 1.7025,
+      "step": 1154
+    },
+    {
+      "epoch": 1.133325156384153,
+      "grad_norm": 2.0498745441436768,
+      "learning_rate": 7.993130717651207e-05,
+      "loss": 1.7402,
+      "step": 1155
+    },
+    {
+      "epoch": 1.1343063902857844,
+      "grad_norm": 2.068598747253418,
+      "learning_rate": 7.977964680356696e-05,
+      "loss": 1.7556,
+      "step": 1156
+    },
+    {
+      "epoch": 1.1352876241874157,
+      "grad_norm": 1.9342832565307617,
+      "learning_rate": 7.962803490634563e-05,
+      "loss": 1.6636,
+      "step": 1157
+    },
+    {
+      "epoch": 1.136268858089047,
+      "grad_norm": 1.9978430271148682,
+      "learning_rate": 7.947647184831824e-05,
+      "loss": 1.49,
+      "step": 1158
+    },
+    {
+      "epoch": 1.1372500919906783,
+      "grad_norm": 2.0596561431884766,
+      "learning_rate": 7.932495799283801e-05,
+      "loss": 1.5796,
+      "step": 1159
+    },
+    {
+      "epoch": 1.1382313258923096,
+      "grad_norm": 1.8670997619628906,
+      "learning_rate": 7.917349370314007e-05,
+      "loss": 1.4911,
+      "step": 1160
+    },
+    {
+      "epoch": 1.1392125597939409,
+      "grad_norm": 1.8702526092529297,
+      "learning_rate": 7.902207934234078e-05,
+      "loss": 1.6028,
+      "step": 1161
+    },
+    {
+      "epoch": 1.1401937936955722,
+      "grad_norm": 2.011188268661499,
+      "learning_rate": 7.887071527343687e-05,
+      "loss": 1.7725,
+      "step": 1162
+    },
+    {
+      "epoch": 1.1411750275972035,
+      "grad_norm": 2.2111904621124268,
+      "learning_rate": 7.871940185930438e-05,
+      "loss": 1.9471,
+      "step": 1163
+    },
+    {
+      "epoch": 1.1421562614988348,
+      "grad_norm": 3.008547306060791,
+      "learning_rate": 7.856813946269795e-05,
+      "loss": 1.6018,
+      "step": 1164
+    },
+    {
+      "epoch": 1.143137495400466,
+      "grad_norm": 2.02215313911438,
+      "learning_rate": 7.841692844624999e-05,
+      "loss": 1.7439,
+      "step": 1165
+    },
+    {
+      "epoch": 1.1441187293020973,
+      "grad_norm": 1.9515390396118164,
+      "learning_rate": 7.826576917246961e-05,
+      "loss": 1.4803,
+      "step": 1166
+    },
+    {
+      "epoch": 1.1450999632037286,
+      "grad_norm": 2.102198362350464,
+      "learning_rate": 7.811466200374194e-05,
+      "loss": 1.8823,
+      "step": 1167
+    },
+    {
+      "epoch": 1.14608119710536,
+      "grad_norm": 2.1199088096618652,
+      "learning_rate": 7.796360730232724e-05,
+      "loss": 1.7533,
+      "step": 1168
+    },
+    {
+      "epoch": 1.1470624310069912,
+      "grad_norm": 1.9005942344665527,
+      "learning_rate": 7.78126054303599e-05,
+      "loss": 1.5241,
+      "step": 1169
+    },
+    {
+      "epoch": 1.1480436649086225,
+      "grad_norm": 1.9489630460739136,
+      "learning_rate": 7.766165674984766e-05,
+      "loss": 1.6332,
+      "step": 1170
+    },
+    {
+      "epoch": 1.1490248988102538,
+      "grad_norm": 1.9742125272750854,
+      "learning_rate": 7.751076162267086e-05,
+      "loss": 1.5182,
+      "step": 1171
+    },
+    {
+      "epoch": 1.1500061327118851,
+      "grad_norm": 2.1530141830444336,
+      "learning_rate": 7.735992041058126e-05,
+      "loss": 1.7049,
+      "step": 1172
+    },
+    {
+      "epoch": 1.1509873666135164,
+      "grad_norm": 1.9191465377807617,
+      "learning_rate": 7.720913347520154e-05,
+      "loss": 1.6947,
+      "step": 1173
+    },
+    {
+      "epoch": 1.1519686005151477,
+      "grad_norm": 2.124904155731201,
+      "learning_rate": 7.705840117802418e-05,
+      "loss": 1.4705,
+      "step": 1174
+    },
+    {
+      "epoch": 1.152949834416779,
+      "grad_norm": 2.041997194290161,
+      "learning_rate": 7.69077238804107e-05,
+      "loss": 1.6705,
+      "step": 1175
+    },
+    {
+      "epoch": 1.1539310683184105,
+      "grad_norm": 2.099247694015503,
+      "learning_rate": 7.675710194359069e-05,
+      "loss": 1.8343,
+      "step": 1176
+    },
+    {
+      "epoch": 1.1549123022200418,
+      "grad_norm": 2.0407588481903076,
+      "learning_rate": 7.660653572866112e-05,
+      "loss": 1.4873,
+      "step": 1177
+    },
+    {
+      "epoch": 1.1558935361216731,
+      "grad_norm": 1.955540418624878,
+      "learning_rate": 7.645602559658533e-05,
+      "loss": 1.4963,
+      "step": 1178
+    },
+    {
+      "epoch": 1.1568747700233044,
+      "grad_norm": 2.0383074283599854,
+      "learning_rate": 7.630557190819217e-05,
+      "loss": 1.5029,
+      "step": 1179
+    },
+    {
+      "epoch": 1.1578560039249357,
+      "grad_norm": 1.9726423025131226,
+      "learning_rate": 7.61551750241753e-05,
+      "loss": 1.5796,
+      "step": 1180
+    },
+    {
+      "epoch": 1.158837237826567,
+      "grad_norm": 2.105414867401123,
+      "learning_rate": 7.600483530509204e-05,
+      "loss": 1.6223,
+      "step": 1181
+    },
+    {
+      "epoch": 1.1598184717281983,
+      "grad_norm": 1.842738151550293,
+      "learning_rate": 7.58545531113627e-05,
+      "loss": 1.5008,
+      "step": 1182
+    },
+    {
+      "epoch": 1.1607997056298296,
+      "grad_norm": 1.8421316146850586,
+      "learning_rate": 7.57043288032698e-05,
+      "loss": 1.5295,
+      "step": 1183
+    },
+    {
+      "epoch": 1.1617809395314609,
+      "grad_norm": 1.9387428760528564,
+      "learning_rate": 7.555416274095694e-05,
+      "loss": 1.6356,
+      "step": 1184
+    },
+    {
+      "epoch": 1.1627621734330922,
+      "grad_norm": 2.176913261413574,
+      "learning_rate": 7.540405528442822e-05,
+      "loss": 1.746,
+      "step": 1185
+    },
+    {
+      "epoch": 1.1637434073347235,
+      "grad_norm": 2.187565565109253,
+      "learning_rate": 7.525400679354712e-05,
+      "loss": 1.7062,
+      "step": 1186
+    },
+    {
+      "epoch": 1.1647246412363548,
+      "grad_norm": 2.3154940605163574,
+      "learning_rate": 7.510401762803575e-05,
+      "loss": 1.7859,
+      "step": 1187
+    },
+    {
+      "epoch": 1.165705875137986,
+      "grad_norm": 1.9886435270309448,
+      "learning_rate": 7.495408814747418e-05,
+      "loss": 1.5434,
+      "step": 1188
+    },
+    {
+      "epoch": 1.1666871090396174,
+      "grad_norm": 2.100955009460449,
+      "learning_rate": 7.480421871129914e-05,
+      "loss": 1.717,
+      "step": 1189
+    },
+    {
+      "epoch": 1.1676683429412487,
+      "grad_norm": 1.8335407972335815,
+      "learning_rate": 7.465440967880354e-05,
+      "loss": 1.3502,
+      "step": 1190
+    },
+    {
+      "epoch": 1.16864957684288,
+      "grad_norm": 2.223097085952759,
+      "learning_rate": 7.450466140913557e-05,
+      "loss": 1.8062,
+      "step": 1191
+    },
+    {
+      "epoch": 1.1696308107445113,
+      "grad_norm": 2.0861454010009766,
+      "learning_rate": 7.435497426129759e-05,
+      "loss": 1.6631,
+      "step": 1192
+    },
+    {
+      "epoch": 1.1706120446461425,
+      "grad_norm": 2.1116607189178467,
+      "learning_rate": 7.420534859414542e-05,
+      "loss": 1.5691,
+      "step": 1193
+    },
+    {
+      "epoch": 1.1715932785477738,
+      "grad_norm": 2.043426036834717,
+      "learning_rate": 7.405578476638768e-05,
+      "loss": 1.7083,
+      "step": 1194
+    },
+    {
+      "epoch": 1.1725745124494051,
+      "grad_norm": 1.9768962860107422,
+      "learning_rate": 7.390628313658457e-05,
+      "loss": 1.5393,
+      "step": 1195
+    },
+    {
+      "epoch": 1.1735557463510364,
+      "grad_norm": 1.9110056161880493,
+      "learning_rate": 7.375684406314715e-05,
+      "loss": 1.4994,
+      "step": 1196
+    },
+    {
+      "epoch": 1.1745369802526677,
+      "grad_norm": 2.3619277477264404,
+      "learning_rate": 7.360746790433672e-05,
+      "loss": 1.6535,
+      "step": 1197
+    },
+    {
+      "epoch": 1.175518214154299,
+      "grad_norm": 2.012925624847412,
+      "learning_rate": 7.345815501826353e-05,
+      "loss": 1.6035,
+      "step": 1198
+    },
+    {
+      "epoch": 1.1764994480559303,
+      "grad_norm": 2.2382020950317383,
+      "learning_rate": 7.330890576288619e-05,
+      "loss": 1.901,
+      "step": 1199
+    },
+    {
+      "epoch": 1.1774806819575616,
+      "grad_norm": 2.193420886993408,
+      "learning_rate": 7.315972049601086e-05,
+      "loss": 1.7189,
+      "step": 1200
+    },
+    {
+      "epoch": 1.1774806819575616,
+      "eval_loss": 1.8992936611175537,
+      "eval_runtime": 23.2051,
+      "eval_samples_per_second": 58.565,
+      "eval_steps_per_second": 14.652,
+      "step": 1200
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.650775715250176e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null