Training in progress, step 178, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +200 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45519246c233537a9c96f44a4d465ea85a963480136a541f9cbef72f0a5578ed
 size 34456

 version https://git-lfs.github.com/spec/v1
+oid sha256:4c37fbb107d41cbdc7a6dbce9c43635c4517f7b07dd0a7424f9e01ef2aa75936
 size 34456

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2c66b18bf635fa470d8614c4e6f8862ac1dec0781d06227d3d1f23e799c647a
 size 73222

 version https://git-lfs.github.com/spec/v1
+oid sha256:de5cc0ba260d4fe551566442de17154a3f72130a18586a07393551b63066e192
 size 73222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3540ef75a83e32e2a105c3eb2c2e4e2abded82469ef011ec799eac944acee9d3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b22af1a69aed4cc95de7744115b1d53ef93fdd980454932bba3d5b2f4fe9efe
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:400e85cd31bb8cbbe64211436e490aa3113b033965821187553a5ca8b55d7f62
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:081d1f5acdcf54d3de0b373d287b553d0238f70fe938b04f3f8bf6870fa7d1d6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 2.5316455696202533,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,202 @@
       "eval_samples_per_second": 219.836,
       "eval_steps_per_second": 54.959,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1308,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 24118847078400.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 11.5,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.0042194092827006,
   "eval_steps": 50,
+  "global_step": 178,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 219.836,
       "eval_steps_per_second": 54.959,
       "step": 150
+    },
+    {
+      "epoch": 2.548523206751055,
+      "grad_norm": 0.0002957537362817675,
+      "learning_rate": 6.238828904562316e-06,
+      "loss": 46.0,
+      "step": 151
+    },
+    {
+      "epoch": 2.5654008438818563,
+      "grad_norm": 0.0003422394802328199,
+      "learning_rate": 5.794230324769517e-06,
+      "loss": 46.0,
+      "step": 152
+    },
+    {
+      "epoch": 2.5822784810126582,
+      "grad_norm": 0.0002922121493611485,
+      "learning_rate": 5.365089537819434e-06,
+      "loss": 46.0,
+      "step": 153
+    },
+    {
+      "epoch": 2.59915611814346,
+      "grad_norm": 0.0002365944819757715,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 46.0,
+      "step": 154
+    },
+    {
+      "epoch": 2.6160337552742616,
+      "grad_norm": 0.00023224019969347864,
+      "learning_rate": 4.5537761293894535e-06,
+      "loss": 46.0,
+      "step": 155
+    },
+    {
+      "epoch": 2.632911392405063,
+      "grad_norm": 0.0006254777545109391,
+      "learning_rate": 4.1718872065011904e-06,
+      "loss": 46.0,
+      "step": 156
+    },
+    {
+      "epoch": 2.649789029535865,
+      "grad_norm": 0.0002028813469223678,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 46.0,
+      "step": 157
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.0004217730602249503,
+      "learning_rate": 3.4563125677897932e-06,
+      "loss": 46.0,
+      "step": 158
+    },
+    {
+      "epoch": 2.6835443037974684,
+      "grad_norm": 0.00040401105070486665,
+      "learning_rate": 3.1228770728000455e-06,
+      "loss": 46.0,
+      "step": 159
+    },
+    {
+      "epoch": 2.70042194092827,
+      "grad_norm": 0.0005044445861130953,
+      "learning_rate": 2.8058334845816213e-06,
+      "loss": 46.0,
+      "step": 160
+    },
+    {
+      "epoch": 2.717299578059072,
+      "grad_norm": 0.00028344604652374983,
+      "learning_rate": 2.5052926663577e-06,
+      "loss": 46.0,
+      "step": 161
+    },
+    {
+      "epoch": 2.7341772151898733,
+      "grad_norm": 0.00025991967413574457,
+      "learning_rate": 2.221359710692961e-06,
+      "loss": 46.0,
+      "step": 162
+    },
+    {
+      "epoch": 2.7510548523206753,
+      "grad_norm": 0.00036968718632124364,
+      "learning_rate": 1.9541339027450256e-06,
+      "loss": 46.0,
+      "step": 163
+    },
+    {
+      "epoch": 2.7679324894514767,
+      "grad_norm": 0.0002495154330972582,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 46.0,
+      "step": 164
+    },
+    {
+      "epoch": 2.7848101265822782,
+      "grad_norm": 0.00027114481781609356,
+      "learning_rate": 1.4701716273304521e-06,
+      "loss": 46.0,
+      "step": 165
+    },
+    {
+      "epoch": 2.80168776371308,
+      "grad_norm": 0.00027105191838927567,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 46.0,
+      "step": 166
+    },
+    {
+      "epoch": 2.818565400843882,
+      "grad_norm": 0.00027963874163106084,
+      "learning_rate": 1.0540827051175818e-06,
+      "loss": 46.0,
+      "step": 167
+    },
+    {
+      "epoch": 2.8354430379746836,
+      "grad_norm": 0.0003596085589379072,
+      "learning_rate": 8.716763383355864e-07,
+      "loss": 46.0,
+      "step": 168
+    },
+    {
+      "epoch": 2.852320675105485,
+      "grad_norm": 0.0002700402110349387,
+      "learning_rate": 7.064490740882057e-07,
+      "loss": 46.0,
+      "step": 169
+    },
+    {
+      "epoch": 2.869198312236287,
+      "grad_norm": 0.0003367900208104402,
+      "learning_rate": 5.584586887435739e-07,
+      "loss": 46.0,
+      "step": 170
+    },
+    {
+      "epoch": 2.8860759493670884,
+      "grad_norm": 0.0005412886966951191,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 46.0,
+      "step": 171
+    },
+    {
+      "epoch": 2.9029535864978904,
+      "grad_norm": 0.0004628577153198421,
+      "learning_rate": 3.143895053378698e-07,
+      "loss": 46.0,
+      "step": 172
+    },
+    {
+      "epoch": 2.919831223628692,
+      "grad_norm": 0.00022919590992387384,
+      "learning_rate": 2.1839605294330933e-07,
+      "loss": 46.0,
+      "step": 173
+    },
+    {
+      "epoch": 2.9367088607594938,
+      "grad_norm": 0.00035376212326809764,
+      "learning_rate": 1.3981014094099353e-07,
+      "loss": 46.0,
+      "step": 174
+    },
+    {
+      "epoch": 2.9535864978902953,
+      "grad_norm": 0.0006031348602846265,
+      "learning_rate": 7.865924910916977e-08,
+      "loss": 46.0,
+      "step": 175
+    },
+    {
+      "epoch": 2.970464135021097,
+      "grad_norm": 0.0004883770016022027,
+      "learning_rate": 3.496476058006959e-08,
+      "loss": 46.0,
+      "step": 176
+    },
+    {
+      "epoch": 2.9873417721518987,
+      "grad_norm": 0.00046055944403633475,
+      "learning_rate": 8.741954362678772e-09,
+      "loss": 46.0,
+      "step": 177
+    },
+    {
+      "epoch": 3.0042194092827006,
+      "grad_norm": 0.0002950435155071318,
+      "learning_rate": 0.0,
+      "loss": 46.0,
+      "step": 178
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 28621031866368.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null