Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:460f05eb744d34bffeb9d19e824666f20e88afd559c776d265689098f45e605b
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:2522b09efa076968ced922c449d27907ada5b32883602e1e3e6aa9fe9469a936
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55d9ad367516a8337e81e182787df447c54393d8f694d96c7cc365bb3c4bdbf2
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:989007c27adb1a0904f90610ed91602b9ac4d2b1c2fa938749d43cd743b94987
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f801f35493f21aa7b6d4d83ff0bfcabdb11cc14fc16dfaff199f1babb88ccfc5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:52f31a55dc0880e475a3dd7d264226b9d065f6dcd16f0fc90c5eed82bf024661
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01c5525f1d8420ca8a81a7fd2ec397a508131d03210dfd36c7ac5758b0e6313b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3e88ef6a2716260516e17223973d6a3b0a4c88bf12c72ed47e80e6f2a6782fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8841075897216797,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.3325942350332594,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.386,
       "eval_steps_per_second": 2.371,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.36624857350144e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8263623714447021,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.4434589800443459,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.386,
       "eval_steps_per_second": 2.371,
       "step": 150
+    },
+    {
+      "epoch": 0.3348115299334812,
+      "grad_norm": 9.736676216125488,
+      "learning_rate": 2.589263157894737e-05,
+      "loss": 1.6967,
+      "step": 151
+    },
+    {
+      "epoch": 0.3370288248337029,
+      "grad_norm": 10.335311889648438,
+      "learning_rate": 2.536421052631579e-05,
+      "loss": 1.6221,
+      "step": 152
+    },
+    {
+      "epoch": 0.3392461197339246,
+      "grad_norm": 10.948195457458496,
+      "learning_rate": 2.483578947368421e-05,
+      "loss": 1.891,
+      "step": 153
+    },
+    {
+      "epoch": 0.34146341463414637,
+      "grad_norm": 10.740894317626953,
+      "learning_rate": 2.430736842105263e-05,
+      "loss": 1.5576,
+      "step": 154
+    },
+    {
+      "epoch": 0.3436807095343681,
+      "grad_norm": 10.873139381408691,
+      "learning_rate": 2.3778947368421052e-05,
+      "loss": 1.5878,
+      "step": 155
+    },
+    {
+      "epoch": 0.3458980044345898,
+      "grad_norm": 14.580072402954102,
+      "learning_rate": 2.3250526315789473e-05,
+      "loss": 1.9496,
+      "step": 156
+    },
+    {
+      "epoch": 0.34811529933481156,
+      "grad_norm": 12.087544441223145,
+      "learning_rate": 2.2722105263157894e-05,
+      "loss": 1.8294,
+      "step": 157
+    },
+    {
+      "epoch": 0.35033259423503327,
+      "grad_norm": 14.1340913772583,
+      "learning_rate": 2.2193684210526316e-05,
+      "loss": 1.8763,
+      "step": 158
+    },
+    {
+      "epoch": 0.352549889135255,
+      "grad_norm": 11.832399368286133,
+      "learning_rate": 2.1665263157894737e-05,
+      "loss": 1.771,
+      "step": 159
+    },
+    {
+      "epoch": 0.35476718403547675,
+      "grad_norm": 14.125495910644531,
+      "learning_rate": 2.1136842105263158e-05,
+      "loss": 2.1433,
+      "step": 160
+    },
+    {
+      "epoch": 0.35698447893569846,
+      "grad_norm": 16.098318099975586,
+      "learning_rate": 2.060842105263158e-05,
+      "loss": 1.5098,
+      "step": 161
+    },
+    {
+      "epoch": 0.35920177383592017,
+      "grad_norm": 11.606100082397461,
+      "learning_rate": 2.008e-05,
+      "loss": 1.531,
+      "step": 162
+    },
+    {
+      "epoch": 0.3614190687361419,
+      "grad_norm": 14.126666069030762,
+      "learning_rate": 1.9551578947368422e-05,
+      "loss": 2.1956,
+      "step": 163
+    },
+    {
+      "epoch": 0.36363636363636365,
+      "grad_norm": 13.657944679260254,
+      "learning_rate": 1.9023157894736843e-05,
+      "loss": 2.0216,
+      "step": 164
+    },
+    {
+      "epoch": 0.36585365853658536,
+      "grad_norm": 16.890853881835938,
+      "learning_rate": 1.849473684210526e-05,
+      "loss": 2.0006,
+      "step": 165
+    },
+    {
+      "epoch": 0.36807095343680707,
+      "grad_norm": 12.908790588378906,
+      "learning_rate": 1.7966315789473686e-05,
+      "loss": 1.6173,
+      "step": 166
+    },
+    {
+      "epoch": 0.37028824833702884,
+      "grad_norm": 17.241039276123047,
+      "learning_rate": 1.7437894736842107e-05,
+      "loss": 1.9784,
+      "step": 167
+    },
+    {
+      "epoch": 0.37250554323725055,
+      "grad_norm": 15.207599639892578,
+      "learning_rate": 1.6909473684210525e-05,
+      "loss": 1.5988,
+      "step": 168
+    },
+    {
+      "epoch": 0.37472283813747226,
+      "grad_norm": 15.888737678527832,
+      "learning_rate": 1.638105263157895e-05,
+      "loss": 1.4648,
+      "step": 169
+    },
+    {
+      "epoch": 0.376940133037694,
+      "grad_norm": 12.339811325073242,
+      "learning_rate": 1.5852631578947368e-05,
+      "loss": 1.5447,
+      "step": 170
+    },
+    {
+      "epoch": 0.37915742793791574,
+      "grad_norm": 12.044404983520508,
+      "learning_rate": 1.532421052631579e-05,
+      "loss": 1.3339,
+      "step": 171
+    },
+    {
+      "epoch": 0.38137472283813745,
+      "grad_norm": 11.459868431091309,
+      "learning_rate": 1.4795789473684209e-05,
+      "loss": 1.4563,
+      "step": 172
+    },
+    {
+      "epoch": 0.3835920177383592,
+      "grad_norm": 14.248141288757324,
+      "learning_rate": 1.4267368421052632e-05,
+      "loss": 1.5893,
+      "step": 173
+    },
+    {
+      "epoch": 0.3858093126385809,
+      "grad_norm": 19.118242263793945,
+      "learning_rate": 1.3738947368421053e-05,
+      "loss": 1.9648,
+      "step": 174
+    },
+    {
+      "epoch": 0.38802660753880264,
+      "grad_norm": 15.491469383239746,
+      "learning_rate": 1.3210526315789473e-05,
+      "loss": 1.5081,
+      "step": 175
+    },
+    {
+      "epoch": 0.3902439024390244,
+      "grad_norm": 13.472190856933594,
+      "learning_rate": 1.2682105263157896e-05,
+      "loss": 1.2014,
+      "step": 176
+    },
+    {
+      "epoch": 0.3924611973392461,
+      "grad_norm": 16.698389053344727,
+      "learning_rate": 1.2153684210526315e-05,
+      "loss": 1.6185,
+      "step": 177
+    },
+    {
+      "epoch": 0.3946784922394678,
+      "grad_norm": 14.444684028625488,
+      "learning_rate": 1.1625263157894737e-05,
+      "loss": 1.663,
+      "step": 178
+    },
+    {
+      "epoch": 0.3968957871396896,
+      "grad_norm": 12.130651473999023,
+      "learning_rate": 1.1096842105263158e-05,
+      "loss": 1.1374,
+      "step": 179
+    },
+    {
+      "epoch": 0.3991130820399113,
+      "grad_norm": 13.900895118713379,
+      "learning_rate": 1.0568421052631579e-05,
+      "loss": 1.3243,
+      "step": 180
+    },
+    {
+      "epoch": 0.401330376940133,
+      "grad_norm": 15.653491020202637,
+      "learning_rate": 1.004e-05,
+      "loss": 1.7645,
+      "step": 181
+    },
+    {
+      "epoch": 0.4035476718403548,
+      "grad_norm": 15.8237886428833,
+      "learning_rate": 9.511578947368422e-06,
+      "loss": 1.6201,
+      "step": 182
+    },
+    {
+      "epoch": 0.4057649667405765,
+      "grad_norm": 32.045345306396484,
+      "learning_rate": 8.983157894736843e-06,
+      "loss": 1.7167,
+      "step": 183
+    },
+    {
+      "epoch": 0.4079822616407982,
+      "grad_norm": 13.554454803466797,
+      "learning_rate": 8.454736842105263e-06,
+      "loss": 1.1857,
+      "step": 184
+    },
+    {
+      "epoch": 0.41019955654101997,
+      "grad_norm": 13.749832153320312,
+      "learning_rate": 7.926315789473684e-06,
+      "loss": 1.0891,
+      "step": 185
+    },
+    {
+      "epoch": 0.4124168514412417,
+      "grad_norm": 16.378047943115234,
+      "learning_rate": 7.397894736842104e-06,
+      "loss": 1.244,
+      "step": 186
+    },
+    {
+      "epoch": 0.4146341463414634,
+      "grad_norm": 20.462013244628906,
+      "learning_rate": 6.8694736842105265e-06,
+      "loss": 1.659,
+      "step": 187
+    },
+    {
+      "epoch": 0.41685144124168516,
+      "grad_norm": 18.432472229003906,
+      "learning_rate": 6.341052631578948e-06,
+      "loss": 1.8659,
+      "step": 188
+    },
+    {
+      "epoch": 0.4190687361419069,
+      "grad_norm": 21.487030029296875,
+      "learning_rate": 5.812631578947368e-06,
+      "loss": 1.7793,
+      "step": 189
+    },
+    {
+      "epoch": 0.4212860310421286,
+      "grad_norm": 27.553062438964844,
+      "learning_rate": 5.2842105263157896e-06,
+      "loss": 2.1433,
+      "step": 190
+    },
+    {
+      "epoch": 0.42350332594235035,
+      "grad_norm": 20.972902297973633,
+      "learning_rate": 4.755789473684211e-06,
+      "loss": 1.3501,
+      "step": 191
+    },
+    {
+      "epoch": 0.42572062084257206,
+      "grad_norm": 21.430740356445312,
+      "learning_rate": 4.227368421052631e-06,
+      "loss": 1.2867,
+      "step": 192
+    },
+    {
+      "epoch": 0.4279379157427938,
+      "grad_norm": 21.622007369995117,
+      "learning_rate": 3.698947368421052e-06,
+      "loss": 1.8264,
+      "step": 193
+    },
+    {
+      "epoch": 0.43015521064301554,
+      "grad_norm": 18.00780487060547,
+      "learning_rate": 3.170526315789474e-06,
+      "loss": 1.5396,
+      "step": 194
+    },
+    {
+      "epoch": 0.43237250554323725,
+      "grad_norm": 21.456939697265625,
+      "learning_rate": 2.6421052631578948e-06,
+      "loss": 0.8305,
+      "step": 195
+    },
+    {
+      "epoch": 0.43458980044345896,
+      "grad_norm": 37.9657096862793,
+      "learning_rate": 2.1136842105263157e-06,
+      "loss": 2.433,
+      "step": 196
+    },
+    {
+      "epoch": 0.43680709534368073,
+      "grad_norm": 31.422948837280273,
+      "learning_rate": 1.585263157894737e-06,
+      "loss": 1.5014,
+      "step": 197
+    },
+    {
+      "epoch": 0.43902439024390244,
+      "grad_norm": 22.746129989624023,
+      "learning_rate": 1.0568421052631578e-06,
+      "loss": 1.9673,
+      "step": 198
+    },
+    {
+      "epoch": 0.44124168514412415,
+      "grad_norm": 45.2837028503418,
+      "learning_rate": 5.284210526315789e-07,
+      "loss": 1.036,
+      "step": 199
+    },
+    {
+      "epoch": 0.4434589800443459,
+      "grad_norm": 56.693782806396484,
+      "learning_rate": 0.0,
+      "loss": 2.1921,
+      "step": 200
+    },
+    {
+      "epoch": 0.4434589800443459,
+      "eval_loss": 0.8263623714447021,
+      "eval_runtime": 20.2153,
+      "eval_samples_per_second": 9.399,
+      "eval_steps_per_second": 2.374,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.15499809800192e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null