Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:449fd5f80ed8ca92f5ae98bee82dc195c577e3cf24e3ea644b726b8db62ce9e2
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bd251638029ec94f942e0f584c62f7e67d2cf935a9b36f936a9756ecee08a62
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3dfa1f90564f5da3315be93e0c658d7e50370f8d0d411730a33050c1e0116aee
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:14b36595cea87738c6bf80ad3140915da9c384c0b62cde35b2d100bc63eac37b
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa7d5350fe5db1a64f76c69df238a092f9d88227f9938d54714ace5ed97aa2b3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:de2be08e4c8043003031a6e66ac8a5bac349ca1d43af8829a98146988d3c8b41
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc4a786186a574bdc543ff4b4563aab7c5e0b442c74c85899bb42a25553c5d0c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca62d85cf5423834480d3c20680f93b185c8e6574a8a14021d285e0b05c7449f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.8518824577331543,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.8708272859216255,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 13.2,
       "eval_steps_per_second": 3.323,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.262770368118784e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.616612195968628,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.1611030478955007,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.2,
       "eval_steps_per_second": 3.323,
       "step": 150
+    },
+    {
+      "epoch": 0.8766328011611031,
+      "grad_norm": 3.8558740615844727,
+      "learning_rate": 9.319397726443026e-06,
+      "loss": 3.4249,
+      "step": 151
+    },
+    {
+      "epoch": 0.8824383164005806,
+      "grad_norm": 4.28179407119751,
+      "learning_rate": 8.962896471825342e-06,
+      "loss": 3.2553,
+      "step": 152
+    },
+    {
+      "epoch": 0.888243831640058,
+      "grad_norm": 3.3732564449310303,
+      "learning_rate": 8.61214655125809e-06,
+      "loss": 1.7997,
+      "step": 153
+    },
+    {
+      "epoch": 0.8940493468795355,
+      "grad_norm": 3.3232483863830566,
+      "learning_rate": 8.267243856267331e-06,
+      "loss": 2.1278,
+      "step": 154
+    },
+    {
+      "epoch": 0.8998548621190131,
+      "grad_norm": 2.901930332183838,
+      "learning_rate": 7.928282679806052e-06,
+      "loss": 1.7011,
+      "step": 155
+    },
+    {
+      "epoch": 0.9056603773584906,
+      "grad_norm": 3.175293445587158,
+      "learning_rate": 7.595355690475393e-06,
+      "loss": 1.9759,
+      "step": 156
+    },
+    {
+      "epoch": 0.9114658925979681,
+      "grad_norm": 4.06931734085083,
+      "learning_rate": 7.268553907189964e-06,
+      "loss": 1.4925,
+      "step": 157
+    },
+    {
+      "epoch": 0.9172714078374455,
+      "grad_norm": 4.757447719573975,
+      "learning_rate": 6.947966674294236e-06,
+      "loss": 2.1637,
+      "step": 158
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 3.731959581375122,
+      "learning_rate": 6.6336816371366305e-06,
+      "loss": 1.7434,
+      "step": 159
+    },
+    {
+      "epoch": 0.9288824383164006,
+      "grad_norm": 3.2992019653320312,
+      "learning_rate": 6.325784718108196e-06,
+      "loss": 1.728,
+      "step": 160
+    },
+    {
+      "epoch": 0.9346879535558781,
+      "grad_norm": 3.1267917156219482,
+      "learning_rate": 6.0243600931522595e-06,
+      "loss": 1.4363,
+      "step": 161
+    },
+    {
+      "epoch": 0.9404934687953556,
+      "grad_norm": 3.2282774448394775,
+      "learning_rate": 5.72949016875158e-06,
+      "loss": 1.4056,
+      "step": 162
+    },
+    {
+      "epoch": 0.9462989840348331,
+      "grad_norm": 3.424809455871582,
+      "learning_rate": 5.44125555939923e-06,
+      "loss": 1.5595,
+      "step": 163
+    },
+    {
+      "epoch": 0.9521044992743106,
+      "grad_norm": 2.6489439010620117,
+      "learning_rate": 5.159735065559399e-06,
+      "loss": 1.5116,
+      "step": 164
+    },
+    {
+      "epoch": 0.9579100145137881,
+      "grad_norm": 3.0129847526550293,
+      "learning_rate": 4.885005652124144e-06,
+      "loss": 1.2501,
+      "step": 165
+    },
+    {
+      "epoch": 0.9637155297532656,
+      "grad_norm": 4.441445350646973,
+      "learning_rate": 4.617142427371934e-06,
+      "loss": 1.0584,
+      "step": 166
+    },
+    {
+      "epoch": 0.969521044992743,
+      "grad_norm": 1.9270883798599243,
+      "learning_rate": 4.3562186224338265e-06,
+      "loss": 0.4166,
+      "step": 167
+    },
+    {
+      "epoch": 0.9753265602322206,
+      "grad_norm": 1.6964424848556519,
+      "learning_rate": 4.102305571272783e-06,
+      "loss": 0.3749,
+      "step": 168
+    },
+    {
+      "epoch": 0.9811320754716981,
+      "grad_norm": 2.4464669227600098,
+      "learning_rate": 3.855472691181678e-06,
+      "loss": 0.307,
+      "step": 169
+    },
+    {
+      "epoch": 0.9869375907111756,
+      "grad_norm": 2.4601786136627197,
+      "learning_rate": 3.615787463805331e-06,
+      "loss": 0.4156,
+      "step": 170
+    },
+    {
+      "epoch": 0.9927431059506531,
+      "grad_norm": 1.785112977027893,
+      "learning_rate": 3.383315416691646e-06,
+      "loss": 0.2787,
+      "step": 171
+    },
+    {
+      "epoch": 0.9985486211901307,
+      "grad_norm": 3.0052244663238525,
+      "learning_rate": 3.158120105377096e-06,
+      "loss": 0.8658,
+      "step": 172
+    },
+    {
+      "epoch": 1.004354136429608,
+      "grad_norm": 6.363603115081787,
+      "learning_rate": 2.940263096011233e-06,
+      "loss": 9.4011,
+      "step": 173
+    },
+    {
+      "epoch": 1.0101596516690856,
+      "grad_norm": 5.209592819213867,
+      "learning_rate": 2.729803948525125e-06,
+      "loss": 8.6198,
+      "step": 174
+    },
+    {
+      "epoch": 1.0159651669085632,
+      "grad_norm": 4.575055122375488,
+      "learning_rate": 2.526800200348275e-06,
+      "loss": 6.5637,
+      "step": 175
+    },
+    {
+      "epoch": 1.0217706821480406,
+      "grad_norm": 4.308441638946533,
+      "learning_rate": 2.3313073506784575e-06,
+      "loss": 7.9464,
+      "step": 176
+    },
+    {
+      "epoch": 1.0275761973875182,
+      "grad_norm": 4.7035956382751465,
+      "learning_rate": 2.143378845308791e-06,
+      "loss": 7.9208,
+      "step": 177
+    },
+    {
+      "epoch": 1.0333817126269957,
+      "grad_norm": 4.387391090393066,
+      "learning_rate": 1.9630660620161777e-06,
+      "loss": 6.9943,
+      "step": 178
+    },
+    {
+      "epoch": 1.039187227866473,
+      "grad_norm": 4.29722785949707,
+      "learning_rate": 1.790418296515165e-06,
+      "loss": 6.7689,
+      "step": 179
+    },
+    {
+      "epoch": 1.0449927431059507,
+      "grad_norm": 4.928964138031006,
+      "learning_rate": 1.625482748980961e-06,
+      "loss": 7.2284,
+      "step": 180
+    },
+    {
+      "epoch": 1.050798258345428,
+      "grad_norm": 4.596729755401611,
+      "learning_rate": 1.4683045111453942e-06,
+      "loss": 7.187,
+      "step": 181
+    },
+    {
+      "epoch": 1.0566037735849056,
+      "grad_norm": 3.396052598953247,
+      "learning_rate": 1.3189265539692707e-06,
+      "loss": 6.2928,
+      "step": 182
+    },
+    {
+      "epoch": 1.0624092888243832,
+      "grad_norm": 3.6707615852355957,
+      "learning_rate": 1.1773897158945557e-06,
+      "loss": 6.2344,
+      "step": 183
+    },
+    {
+      "epoch": 1.0682148040638606,
+      "grad_norm": 3.7332701683044434,
+      "learning_rate": 1.0437326916795432e-06,
+      "loss": 6.7565,
+      "step": 184
+    },
+    {
+      "epoch": 1.0740203193033382,
+      "grad_norm": 4.62436580657959,
+      "learning_rate": 9.179920218200888e-07,
+      "loss": 6.8626,
+      "step": 185
+    },
+    {
+      "epoch": 1.0798258345428158,
+      "grad_norm": 4.366294860839844,
+      "learning_rate": 8.002020825598277e-07,
+      "loss": 5.1579,
+      "step": 186
+    },
+    {
+      "epoch": 1.0856313497822931,
+      "grad_norm": 4.452532768249512,
+      "learning_rate": 6.90395076492022e-07,
+      "loss": 6.3907,
+      "step": 187
+    },
+    {
+      "epoch": 1.0914368650217707,
+      "grad_norm": 3.704193353652954,
+      "learning_rate": 5.886010237557194e-07,
+      "loss": 5.5573,
+      "step": 188
+    },
+    {
+      "epoch": 1.097242380261248,
+      "grad_norm": 4.715762615203857,
+      "learning_rate": 4.94847753828529e-07,
+      "loss": 5.9889,
+      "step": 189
+    },
+    {
+      "epoch": 1.1030478955007257,
+      "grad_norm": 3.509040355682373,
+      "learning_rate": 4.091608979183303e-07,
+      "loss": 5.9547,
+      "step": 190
+    },
+    {
+      "epoch": 1.1088534107402033,
+      "grad_norm": 3.5081229209899902,
+      "learning_rate": 3.315638819559452e-07,
+      "loss": 3.8308,
+      "step": 191
+    },
+    {
+      "epoch": 1.1146589259796806,
+      "grad_norm": 3.483219861984253,
+      "learning_rate": 2.6207792019074414e-07,
+      "loss": 3.1807,
+      "step": 192
+    },
+    {
+      "epoch": 1.1204644412191582,
+      "grad_norm": 4.61586332321167,
+      "learning_rate": 2.0072200939085573e-07,
+      "loss": 3.7834,
+      "step": 193
+    },
+    {
+      "epoch": 1.1262699564586356,
+      "grad_norm": 3.6930692195892334,
+      "learning_rate": 1.475129236496575e-07,
+      "loss": 2.9353,
+      "step": 194
+    },
+    {
+      "epoch": 1.1320754716981132,
+      "grad_norm": 4.298684597015381,
+      "learning_rate": 1.0246520979990459e-07,
+      "loss": 2.8793,
+      "step": 195
+    },
+    {
+      "epoch": 1.1378809869375908,
+      "grad_norm": 2.9119861125946045,
+      "learning_rate": 6.559118343676396e-08,
+      "loss": 1.7916,
+      "step": 196
+    },
+    {
+      "epoch": 1.1436865021770681,
+      "grad_norm": 2.8956923484802246,
+      "learning_rate": 3.690092555085789e-08,
+      "loss": 1.4658,
+      "step": 197
+    },
+    {
+      "epoch": 1.1494920174165457,
+      "grad_norm": 2.2967443466186523,
+      "learning_rate": 1.640227977221853e-08,
+      "loss": 1.325,
+      "step": 198
+    },
+    {
+      "epoch": 1.1552975326560233,
+      "grad_norm": 2.335886001586914,
+      "learning_rate": 4.1008502259298755e-09,
+      "loss": 1.7173,
+      "step": 199
+    },
+    {
+      "epoch": 1.1611030478955007,
+      "grad_norm": 3.1506309509277344,
+      "learning_rate": 0.0,
+      "loss": 1.4625,
+      "step": 200
+    },
+    {
+      "epoch": 1.1611030478955007,
+      "eval_loss": 3.616612195968628,
+      "eval_runtime": 21.9647,
+      "eval_samples_per_second": 13.203,
+      "eval_steps_per_second": 3.324,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.016084336504996e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null