Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc8341e80ac0fbf04fba66a8809799ed495ede75dd96e70cd911ecf3ccc335db
 size 478211024

 version https://git-lfs.github.com/spec/v1
+oid sha256:9caa07ffb6a06e984c29553183aed5ef666d5184ad017ad6b5789f9acd46a423
 size 478211024

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bddf571e0cbbf6e8dd7f38cf11cad62e9a17711bb37d860a4709c7d88191f42e
 size 243337876

 version https://git-lfs.github.com/spec/v1
+oid sha256:24d62beb0bb3c053de89a330aad32fd2bcf6fbed049df6e3c34a87367f4adeb5
 size 243337876

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f559f96aa4718e0775b1ba1413fbbe63a38407dc074a3ed16103f8050cf87636
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8cc0b0b7bc1fe634304bc351523fef43bdbbf05bd3cbde470e2ea5592dac098
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef91cf09f7b6a58a39bbfe1ef78ac2fa91c0c15ca1705097a187d272d0433d8d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c47edf82128c3f034f21204d4b9ce5c76cd3269748ce31061b148a0a389d049d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.41922199726104736,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.003850843976638213,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 13.843,
       "eval_steps_per_second": 3.461,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.842158118522061e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.4131671190261841,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.005134458635517617,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.843,
       "eval_steps_per_second": 3.461,
       "step": 150
+    },
+    {
+      "epoch": 0.003876516269815801,
+      "grad_norm": 0.7326381802558899,
+      "learning_rate": 2.5944210526315793e-05,
+      "loss": 1.3282,
+      "step": 151
+    },
+    {
+      "epoch": 0.0039021885629933893,
+      "grad_norm": 0.931328535079956,
+      "learning_rate": 2.5414736842105266e-05,
+      "loss": 1.2657,
+      "step": 152
+    },
+    {
+      "epoch": 0.003927860856170978,
+      "grad_norm": 0.7979090213775635,
+      "learning_rate": 2.4885263157894737e-05,
+      "loss": 1.0431,
+      "step": 153
+    },
+    {
+      "epoch": 0.003953533149348565,
+      "grad_norm": 0.8406013250350952,
+      "learning_rate": 2.4355789473684214e-05,
+      "loss": 1.5444,
+      "step": 154
+    },
+    {
+      "epoch": 0.003979205442526153,
+      "grad_norm": 1.172977328300476,
+      "learning_rate": 2.3826315789473684e-05,
+      "loss": 2.2357,
+      "step": 155
+    },
+    {
+      "epoch": 0.004004877735703741,
+      "grad_norm": 1.081085443496704,
+      "learning_rate": 2.3296842105263158e-05,
+      "loss": 1.9574,
+      "step": 156
+    },
+    {
+      "epoch": 0.0040305500288813296,
+      "grad_norm": 1.213394284248352,
+      "learning_rate": 2.2767368421052635e-05,
+      "loss": 1.7966,
+      "step": 157
+    },
+    {
+      "epoch": 0.004056222322058918,
+      "grad_norm": 1.3481943607330322,
+      "learning_rate": 2.2237894736842105e-05,
+      "loss": 2.1099,
+      "step": 158
+    },
+    {
+      "epoch": 0.004081894615236506,
+      "grad_norm": 1.1967196464538574,
+      "learning_rate": 2.170842105263158e-05,
+      "loss": 1.7427,
+      "step": 159
+    },
+    {
+      "epoch": 0.004107566908414094,
+      "grad_norm": 1.6480809450149536,
+      "learning_rate": 2.1178947368421053e-05,
+      "loss": 2.2222,
+      "step": 160
+    },
+    {
+      "epoch": 0.004133239201591682,
+      "grad_norm": 1.4550552368164062,
+      "learning_rate": 2.0649473684210527e-05,
+      "loss": 1.5513,
+      "step": 161
+    },
+    {
+      "epoch": 0.00415891149476927,
+      "grad_norm": 1.5787893533706665,
+      "learning_rate": 2.0120000000000004e-05,
+      "loss": 2.0812,
+      "step": 162
+    },
+    {
+      "epoch": 0.004184583787946858,
+      "grad_norm": 1.6700499057769775,
+      "learning_rate": 1.9590526315789474e-05,
+      "loss": 2.0889,
+      "step": 163
+    },
+    {
+      "epoch": 0.0042102560811244465,
+      "grad_norm": 1.2719978094100952,
+      "learning_rate": 1.9061052631578948e-05,
+      "loss": 1.6051,
+      "step": 164
+    },
+    {
+      "epoch": 0.004235928374302035,
+      "grad_norm": 2.337200164794922,
+      "learning_rate": 1.8531578947368422e-05,
+      "loss": 2.2032,
+      "step": 165
+    },
+    {
+      "epoch": 0.004261600667479623,
+      "grad_norm": 2.0356595516204834,
+      "learning_rate": 1.8002105263157896e-05,
+      "loss": 2.3998,
+      "step": 166
+    },
+    {
+      "epoch": 0.004287272960657211,
+      "grad_norm": 1.9695961475372314,
+      "learning_rate": 1.747263157894737e-05,
+      "loss": 2.3592,
+      "step": 167
+    },
+    {
+      "epoch": 0.004312945253834799,
+      "grad_norm": 2.0183303356170654,
+      "learning_rate": 1.6943157894736843e-05,
+      "loss": 2.1365,
+      "step": 168
+    },
+    {
+      "epoch": 0.004338617547012387,
+      "grad_norm": 2.894932270050049,
+      "learning_rate": 1.6413684210526317e-05,
+      "loss": 2.4704,
+      "step": 169
+    },
+    {
+      "epoch": 0.004364289840189975,
+      "grad_norm": 1.7332582473754883,
+      "learning_rate": 1.588421052631579e-05,
+      "loss": 0.8739,
+      "step": 170
+    },
+    {
+      "epoch": 0.004389962133367563,
+      "grad_norm": 0.0018881710711866617,
+      "learning_rate": 1.5354736842105264e-05,
+      "loss": 0.0001,
+      "step": 171
+    },
+    {
+      "epoch": 0.0044156344265451515,
+      "grad_norm": 0.0019908491522073746,
+      "learning_rate": 1.4825263157894736e-05,
+      "loss": 0.0001,
+      "step": 172
+    },
+    {
+      "epoch": 0.00444130671972274,
+      "grad_norm": 0.0019480012124404311,
+      "learning_rate": 1.4295789473684212e-05,
+      "loss": 0.0001,
+      "step": 173
+    },
+    {
+      "epoch": 0.004466979012900327,
+      "grad_norm": 0.0019311723299324512,
+      "learning_rate": 1.3766315789473686e-05,
+      "loss": 0.0001,
+      "step": 174
+    },
+    {
+      "epoch": 0.004492651306077915,
+      "grad_norm": 0.0019914451986551285,
+      "learning_rate": 1.3236842105263158e-05,
+      "loss": 0.0001,
+      "step": 175
+    },
+    {
+      "epoch": 0.004518323599255503,
+      "grad_norm": 0.001993614248931408,
+      "learning_rate": 1.2707368421052633e-05,
+      "loss": 0.0001,
+      "step": 176
+    },
+    {
+      "epoch": 0.004543995892433091,
+      "grad_norm": 0.0019910179544240236,
+      "learning_rate": 1.2177894736842107e-05,
+      "loss": 0.0001,
+      "step": 177
+    },
+    {
+      "epoch": 0.004569668185610679,
+      "grad_norm": 0.002013832563534379,
+      "learning_rate": 1.1648421052631579e-05,
+      "loss": 0.0001,
+      "step": 178
+    },
+    {
+      "epoch": 0.0045953404787882676,
+      "grad_norm": 0.0020234170369803905,
+      "learning_rate": 1.1118947368421053e-05,
+      "loss": 0.0001,
+      "step": 179
+    },
+    {
+      "epoch": 0.004621012771965856,
+      "grad_norm": 0.0019844514317810535,
+      "learning_rate": 1.0589473684210526e-05,
+      "loss": 0.0001,
+      "step": 180
+    },
+    {
+      "epoch": 0.004646685065143444,
+      "grad_norm": 0.00206298241391778,
+      "learning_rate": 1.0060000000000002e-05,
+      "loss": 0.0001,
+      "step": 181
+    },
+    {
+      "epoch": 0.004672357358321032,
+      "grad_norm": 0.0020082283299416304,
+      "learning_rate": 9.530526315789474e-06,
+      "loss": 0.0001,
+      "step": 182
+    },
+    {
+      "epoch": 0.00469802965149862,
+      "grad_norm": 0.0020704076159745455,
+      "learning_rate": 9.001052631578948e-06,
+      "loss": 0.0001,
+      "step": 183
+    },
+    {
+      "epoch": 0.004723701944676208,
+      "grad_norm": 0.002045375294983387,
+      "learning_rate": 8.471578947368422e-06,
+      "loss": 0.0001,
+      "step": 184
+    },
+    {
+      "epoch": 0.004749374237853796,
+      "grad_norm": 0.0021098172292113304,
+      "learning_rate": 7.942105263157895e-06,
+      "loss": 0.0001,
+      "step": 185
+    },
+    {
+      "epoch": 0.0047750465310313845,
+      "grad_norm": 0.0021194566506892443,
+      "learning_rate": 7.412631578947368e-06,
+      "loss": 0.0001,
+      "step": 186
+    },
+    {
+      "epoch": 0.004800718824208973,
+      "grad_norm": 0.0020953835919499397,
+      "learning_rate": 6.883157894736843e-06,
+      "loss": 0.0001,
+      "step": 187
+    },
+    {
+      "epoch": 0.004826391117386561,
+      "grad_norm": 0.0021119611337780952,
+      "learning_rate": 6.3536842105263166e-06,
+      "loss": 0.0001,
+      "step": 188
+    },
+    {
+      "epoch": 0.004852063410564149,
+      "grad_norm": 0.0021464722231030464,
+      "learning_rate": 5.8242105263157895e-06,
+      "loss": 0.0001,
+      "step": 189
+    },
+    {
+      "epoch": 0.004877735703741737,
+      "grad_norm": 0.0022363984026014805,
+      "learning_rate": 5.294736842105263e-06,
+      "loss": 0.0001,
+      "step": 190
+    },
+    {
+      "epoch": 0.004903407996919325,
+      "grad_norm": 0.002060087164863944,
+      "learning_rate": 4.765263157894737e-06,
+      "loss": 0.0001,
+      "step": 191
+    },
+    {
+      "epoch": 0.004929080290096913,
+      "grad_norm": 0.0019431081600487232,
+      "learning_rate": 4.235789473684211e-06,
+      "loss": 0.0001,
+      "step": 192
+    },
+    {
+      "epoch": 0.004954752583274501,
+      "grad_norm": 0.0021071808878332376,
+      "learning_rate": 3.706315789473684e-06,
+      "loss": 0.0001,
+      "step": 193
+    },
+    {
+      "epoch": 0.0049804248764520895,
+      "grad_norm": 0.0020984155125916004,
+      "learning_rate": 3.1768421052631583e-06,
+      "loss": 0.0001,
+      "step": 194
+    },
+    {
+      "epoch": 0.005006097169629677,
+      "grad_norm": 0.002161442069336772,
+      "learning_rate": 2.6473684210526316e-06,
+      "loss": 0.0001,
+      "step": 195
+    },
+    {
+      "epoch": 0.005031769462807265,
+      "grad_norm": 0.002335567260161042,
+      "learning_rate": 2.1178947368421054e-06,
+      "loss": 0.0001,
+      "step": 196
+    },
+    {
+      "epoch": 0.005057441755984853,
+      "grad_norm": 1.0611015558242798,
+      "learning_rate": 1.5884210526315791e-06,
+      "loss": 0.2117,
+      "step": 197
+    },
+    {
+      "epoch": 0.005083114049162441,
+      "grad_norm": 0.39899060130119324,
+      "learning_rate": 1.0589473684210527e-06,
+      "loss": 0.089,
+      "step": 198
+    },
+    {
+      "epoch": 0.005108786342340029,
+      "grad_norm": 3.193357467651367,
+      "learning_rate": 5.294736842105263e-07,
+      "loss": 1.4026,
+      "step": 199
+    },
+    {
+      "epoch": 0.005134458635517617,
+      "grad_norm": 3.7854483127593994,
+      "learning_rate": 0.0,
+      "loss": 1.5641,
+      "step": 200
+    },
+    {
+      "epoch": 0.005134458635517617,
+      "eval_loss": 0.4131671190261841,
+      "eval_runtime": 1189.4105,
+      "eval_samples_per_second": 13.79,
+      "eval_steps_per_second": 3.448,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.795839081879962e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null