Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:649c4a1a484c0159c613b8a489ed1d96c5992ebf8e4020825625ed9942df0f23
 size 1006723888

 version https://git-lfs.github.com/spec/v1
+oid sha256:23b422ba78bc9e90b0f2614b8e33445b0335f8d20b63784419f73285b4467329
 size 1006723888

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44ef3099ee7dc2d7c7f4dee9d1d12cb55421ed5cf2b20b0662afa2e9af0dfd8c
 size 511971028

 version https://git-lfs.github.com/spec/v1
+oid sha256:069a8e65ea184e2e2902171fc9a9bad798c766ef86f2cc8378c975588dab14e5
 size 511971028

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82d44daf951237b37f6b647168b8b9ee79566f155f3731201afa11fd1088bf26
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:125a26544738dfc9aed7f98590938f6fab5cc39bc12f4110a630a9f03d0a36c8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b26348e70deb5f7d6cee52f5eb084c0e6829440001a3c6d0128b93cd074af8c2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b60d6f1383abda4776549360effee800fe6cfe2c0604503e9e3fbaa79347f790
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.016963951289653778,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.8691588785046729,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 9.361,
       "eval_steps_per_second": 2.358,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.1977505487742566e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.015557925216853619,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 2.4922118380062304,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.361,
       "eval_steps_per_second": 2.358,
       "step": 150
+    },
+    {
+      "epoch": 1.881619937694704,
+      "grad_norm": 0.7227643728256226,
+      "learning_rate": 1.0872630680850196e-05,
+      "loss": 0.0189,
+      "step": 151
+    },
+    {
+      "epoch": 1.8940809968847352,
+      "grad_norm": 1.015174150466919,
+      "learning_rate": 1.0456712550462898e-05,
+      "loss": 0.0153,
+      "step": 152
+    },
+    {
+      "epoch": 1.9065420560747663,
+      "grad_norm": 1.9926519393920898,
+      "learning_rate": 1.0047504309801104e-05,
+      "loss": 0.0367,
+      "step": 153
+    },
+    {
+      "epoch": 1.9190031152647975,
+      "grad_norm": 3.9706075191497803,
+      "learning_rate": 9.645117832311886e-06,
+      "loss": 0.0197,
+      "step": 154
+    },
+    {
+      "epoch": 1.9314641744548287,
+      "grad_norm": 0.3654462993144989,
+      "learning_rate": 9.249663126440394e-06,
+      "loss": 0.0063,
+      "step": 155
+    },
+    {
+      "epoch": 1.9439252336448598,
+      "grad_norm": 1.7151124477386475,
+      "learning_rate": 8.861248305554624e-06,
+      "loss": 0.0192,
+      "step": 156
+    },
+    {
+      "epoch": 1.956386292834891,
+      "grad_norm": 0.3314021825790405,
+      "learning_rate": 8.47997955838829e-06,
+      "loss": 0.006,
+      "step": 157
+    },
+    {
+      "epoch": 1.9688473520249221,
+      "grad_norm": 2.699073553085327,
+      "learning_rate": 8.10596112000994e-06,
+      "loss": 0.0559,
+      "step": 158
+    },
+    {
+      "epoch": 1.9813084112149533,
+      "grad_norm": 1.2592265605926514,
+      "learning_rate": 7.739295243326067e-06,
+      "loss": 0.0251,
+      "step": 159
+    },
+    {
+      "epoch": 1.9937694704049844,
+      "grad_norm": 1.4695305824279785,
+      "learning_rate": 7.380082171126228e-06,
+      "loss": 0.0174,
+      "step": 160
+    },
+    {
+      "epoch": 2.0062305295950154,
+      "grad_norm": 1.8099596500396729,
+      "learning_rate": 7.028420108677635e-06,
+      "loss": 0.0869,
+      "step": 161
+    },
+    {
+      "epoch": 2.0186915887850465,
+      "grad_norm": 2.6261696815490723,
+      "learning_rate": 6.684405196876842e-06,
+      "loss": 0.0908,
+      "step": 162
+    },
+    {
+      "epoch": 2.0311526479750777,
+      "grad_norm": 1.4774502515792847,
+      "learning_rate": 6.3481314859657675e-06,
+      "loss": 0.0575,
+      "step": 163
+    },
+    {
+      "epoch": 2.043613707165109,
+      "grad_norm": 1.358638048171997,
+      "learning_rate": 6.019690909819298e-06,
+      "loss": 0.015,
+      "step": 164
+    },
+    {
+      "epoch": 2.05607476635514,
+      "grad_norm": 0.3894352614879608,
+      "learning_rate": 5.6991732608115e-06,
+      "loss": 0.0091,
+      "step": 165
+    },
+    {
+      "epoch": 2.068535825545171,
+      "grad_norm": 1.1438943147659302,
+      "learning_rate": 5.386666165267256e-06,
+      "loss": 0.0294,
+      "step": 166
+    },
+    {
+      "epoch": 2.0809968847352023,
+      "grad_norm": 1.4572371244430542,
+      "learning_rate": 5.08225505950613e-06,
+      "loss": 0.0394,
+      "step": 167
+    },
+    {
+      "epoch": 2.0934579439252334,
+      "grad_norm": 1.2237462997436523,
+      "learning_rate": 4.786023166484913e-06,
+      "loss": 0.0223,
+      "step": 168
+    },
+    {
+      "epoch": 2.1059190031152646,
+      "grad_norm": 1.6464838981628418,
+      "learning_rate": 4.498051473045291e-06,
+      "loss": 0.0206,
+      "step": 169
+    },
+    {
+      "epoch": 2.1183800623052957,
+      "grad_norm": 0.20726564526557922,
+      "learning_rate": 4.218418707772886e-06,
+      "loss": 0.0052,
+      "step": 170
+    },
+    {
+      "epoch": 2.130841121495327,
+      "grad_norm": 1.9816772937774658,
+      "learning_rate": 3.947201319473587e-06,
+      "loss": 0.022,
+      "step": 171
+    },
+    {
+      "epoch": 2.143302180685358,
+      "grad_norm": 0.6985916495323181,
+      "learning_rate": 3.684473456273278e-06,
+      "loss": 0.0288,
+      "step": 172
+    },
+    {
+      "epoch": 2.155763239875389,
+      "grad_norm": 0.35333579778671265,
+      "learning_rate": 3.4303069453464383e-06,
+      "loss": 0.0098,
+      "step": 173
+    },
+    {
+      "epoch": 2.1682242990654204,
+      "grad_norm": 0.07119999825954437,
+      "learning_rate": 3.184771273279312e-06,
+      "loss": 0.0024,
+      "step": 174
+    },
+    {
+      "epoch": 2.1806853582554515,
+      "grad_norm": 0.2853488326072693,
+      "learning_rate": 2.947933567072987e-06,
+      "loss": 0.0049,
+      "step": 175
+    },
+    {
+      "epoch": 2.1931464174454827,
+      "grad_norm": 0.19059792160987854,
+      "learning_rate": 2.719858575791534e-06,
+      "loss": 0.0039,
+      "step": 176
+    },
+    {
+      "epoch": 2.205607476635514,
+      "grad_norm": 2.3855481147766113,
+      "learning_rate": 2.500608652860256e-06,
+      "loss": 0.0126,
+      "step": 177
+    },
+    {
+      "epoch": 2.218068535825545,
+      "grad_norm": 4.39003324508667,
+      "learning_rate": 2.2902437390188737e-06,
+      "loss": 0.0387,
+      "step": 178
+    },
+    {
+      "epoch": 2.230529595015576,
+      "grad_norm": 1.7412304878234863,
+      "learning_rate": 2.0888213459343587e-06,
+      "loss": 0.039,
+      "step": 179
+    },
+    {
+      "epoch": 2.2429906542056073,
+      "grad_norm": 0.8918175101280212,
+      "learning_rate": 1.8963965404777875e-06,
+      "loss": 0.0294,
+      "step": 180
+    },
+    {
+      "epoch": 2.2554517133956384,
+      "grad_norm": 1.0870163440704346,
+      "learning_rate": 1.7130219296696263e-06,
+      "loss": 0.0408,
+      "step": 181
+    },
+    {
+      "epoch": 2.2679127725856696,
+      "grad_norm": 0.6913807392120361,
+      "learning_rate": 1.5387476462974824e-06,
+      "loss": 0.0103,
+      "step": 182
+    },
+    {
+      "epoch": 2.2803738317757007,
+      "grad_norm": 1.1509912014007568,
+      "learning_rate": 1.3736213352103147e-06,
+      "loss": 0.0167,
+      "step": 183
+    },
+    {
+      "epoch": 2.292834890965732,
+      "grad_norm": 0.6549486517906189,
+      "learning_rate": 1.2176881402928002e-06,
+      "loss": 0.0213,
+      "step": 184
+    },
+    {
+      "epoch": 2.305295950155763,
+      "grad_norm": 2.2542812824249268,
+      "learning_rate": 1.0709906921234367e-06,
+      "loss": 0.0316,
+      "step": 185
+    },
+    {
+      "epoch": 2.317757009345794,
+      "grad_norm": 0.4349377155303955,
+      "learning_rate": 9.33569096319799e-07,
+      "loss": 0.0072,
+      "step": 186
+    },
+    {
+      "epoch": 2.3302180685358254,
+      "grad_norm": 0.8061808347702026,
+      "learning_rate": 8.054609225740255e-07,
+      "loss": 0.0079,
+      "step": 187
+    },
+    {
+      "epoch": 2.3426791277258565,
+      "grad_norm": 1.8132058382034302,
+      "learning_rate": 6.867011943816724e-07,
+      "loss": 0.0056,
+      "step": 188
+    },
+    {
+      "epoch": 2.3551401869158877,
+      "grad_norm": 0.5810498595237732,
+      "learning_rate": 5.77322379466617e-07,
+      "loss": 0.0085,
+      "step": 189
+    },
+    {
+      "epoch": 2.367601246105919,
+      "grad_norm": 4.2651543617248535,
+      "learning_rate": 4.773543809047186e-07,
+      "loss": 0.0356,
+      "step": 190
+    },
+    {
+      "epoch": 2.38006230529595,
+      "grad_norm": 0.7540577054023743,
+      "learning_rate": 3.868245289486027e-07,
+      "loss": 0.0181,
+      "step": 191
+    },
+    {
+      "epoch": 2.392523364485981,
+      "grad_norm": 0.4433201551437378,
+      "learning_rate": 3.0575757355586817e-07,
+      "loss": 0.0062,
+      "step": 192
+    },
+    {
+      "epoch": 2.4049844236760123,
+      "grad_norm": 1.2011690139770508,
+      "learning_rate": 2.3417567762266497e-07,
+      "loss": 0.0169,
+      "step": 193
+    },
+    {
+      "epoch": 2.4174454828660434,
+      "grad_norm": 0.12706521153450012,
+      "learning_rate": 1.7209841092460043e-07,
+      "loss": 0.0023,
+      "step": 194
+    },
+    {
+      "epoch": 2.4299065420560746,
+      "grad_norm": 0.7969943284988403,
+      "learning_rate": 1.1954274476655534e-07,
+      "loss": 0.0054,
+      "step": 195
+    },
+    {
+      "epoch": 2.4423676012461057,
+      "grad_norm": 0.659049391746521,
+      "learning_rate": 7.652304734289127e-08,
+      "loss": 0.0037,
+      "step": 196
+    },
+    {
+      "epoch": 2.454828660436137,
+      "grad_norm": 0.21932357549667358,
+      "learning_rate": 4.30510798093342e-08,
+      "loss": 0.0046,
+      "step": 197
+    },
+    {
+      "epoch": 2.467289719626168,
+      "grad_norm": 0.9605801105499268,
+      "learning_rate": 1.9135993067588284e-08,
+      "loss": 0.0205,
+      "step": 198
+    },
+    {
+      "epoch": 2.479750778816199,
+      "grad_norm": 1.6703097820281982,
+      "learning_rate": 4.784325263584854e-09,
+      "loss": 0.0255,
+      "step": 199
+    },
+    {
+      "epoch": 2.4922118380062304,
+      "grad_norm": 1.4675956964492798,
+      "learning_rate": 0.0,
+      "loss": 0.0041,
+      "step": 200
+    },
+    {
+      "epoch": 2.4922118380062304,
+      "eval_loss": 0.015557925216853619,
+      "eval_runtime": 14.4768,
+      "eval_samples_per_second": 9.325,
+      "eval_steps_per_second": 2.349,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.261889388608225e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null