Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1deccbc329544f9f0528e1ba18f150f3ed95f6a1537268f8f866ec5bea9229f5
 size 755015736

 version https://git-lfs.github.com/spec/v1
+oid sha256:e138cc1f53ae15a03c2f4cb2eb57b58276d1ba401e2b7b6454560314f56ed511
 size 755015736

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08f3cee884dbd5f3cc6783cadb9f5aea3fc884fb0019862ab8acfa8ffdf32aaf
 size 383724628

 version https://git-lfs.github.com/spec/v1
+oid sha256:30a1d2c3d5529a388f7715e7859533c5061d082e65f0ee31dec6afc1b08507f1
 size 383724628

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d0a630e4162cc332992da6288695a7b89d60c04f2bc0c56cc4f6dc034cbdb15
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:336dda81b6dfbc99d433c90346258593946116fe986a20a81841691a558e8981
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8685a85e01d0081c4ee6b3d27083bc45de61653fc346f2b531f3e09e6eff0d83
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f96196bd3544de2c28f6af356470f327df948539b0e3259c46b8a6786b633fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4248034954071045,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.08534850640113797,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 4.202,
       "eval_steps_per_second": 1.05,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.475630327660544e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.405928611755371,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.11379800853485064,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.202,
       "eval_steps_per_second": 1.05,
       "step": 150
+    },
+    {
+      "epoch": 0.08591749644381223,
+      "grad_norm": 0.7843250036239624,
+      "learning_rate": 2.599578947368421e-05,
+      "loss": 2.7679,
+      "step": 151
+    },
+    {
+      "epoch": 0.08648648648648649,
+      "grad_norm": 0.7846522331237793,
+      "learning_rate": 2.5465263157894738e-05,
+      "loss": 2.8495,
+      "step": 152
+    },
+    {
+      "epoch": 0.08705547652916074,
+      "grad_norm": 0.9606564044952393,
+      "learning_rate": 2.493473684210526e-05,
+      "loss": 3.2779,
+      "step": 153
+    },
+    {
+      "epoch": 0.087624466571835,
+      "grad_norm": 0.9346444010734558,
+      "learning_rate": 2.440421052631579e-05,
+      "loss": 3.0407,
+      "step": 154
+    },
+    {
+      "epoch": 0.08819345661450925,
+      "grad_norm": 0.9511997699737549,
+      "learning_rate": 2.3873684210526313e-05,
+      "loss": 2.9737,
+      "step": 155
+    },
+    {
+      "epoch": 0.0887624466571835,
+      "grad_norm": 0.8434960246086121,
+      "learning_rate": 2.3343157894736843e-05,
+      "loss": 3.0923,
+      "step": 156
+    },
+    {
+      "epoch": 0.08933143669985776,
+      "grad_norm": 0.879172146320343,
+      "learning_rate": 2.281263157894737e-05,
+      "loss": 2.9789,
+      "step": 157
+    },
+    {
+      "epoch": 0.089900426742532,
+      "grad_norm": 0.8454552292823792,
+      "learning_rate": 2.2282105263157892e-05,
+      "loss": 3.0846,
+      "step": 158
+    },
+    {
+      "epoch": 0.09046941678520626,
+      "grad_norm": 0.8216034770011902,
+      "learning_rate": 2.175157894736842e-05,
+      "loss": 3.3177,
+      "step": 159
+    },
+    {
+      "epoch": 0.09103840682788052,
+      "grad_norm": 0.7577628493309021,
+      "learning_rate": 2.1221052631578944e-05,
+      "loss": 3.1528,
+      "step": 160
+    },
+    {
+      "epoch": 0.09160739687055476,
+      "grad_norm": 0.7628657221794128,
+      "learning_rate": 2.0690526315789474e-05,
+      "loss": 2.8293,
+      "step": 161
+    },
+    {
+      "epoch": 0.09217638691322902,
+      "grad_norm": 0.7651684880256653,
+      "learning_rate": 2.016e-05,
+      "loss": 3.1117,
+      "step": 162
+    },
+    {
+      "epoch": 0.09274537695590328,
+      "grad_norm": 0.7415760159492493,
+      "learning_rate": 1.9629473684210526e-05,
+      "loss": 2.7282,
+      "step": 163
+    },
+    {
+      "epoch": 0.09331436699857752,
+      "grad_norm": 0.8385205864906311,
+      "learning_rate": 1.9098947368421053e-05,
+      "loss": 2.8437,
+      "step": 164
+    },
+    {
+      "epoch": 0.09388335704125178,
+      "grad_norm": 0.8416845202445984,
+      "learning_rate": 1.856842105263158e-05,
+      "loss": 2.8249,
+      "step": 165
+    },
+    {
+      "epoch": 0.09445234708392602,
+      "grad_norm": 0.7852922677993774,
+      "learning_rate": 1.8037894736842105e-05,
+      "loss": 2.8207,
+      "step": 166
+    },
+    {
+      "epoch": 0.09502133712660028,
+      "grad_norm": 0.7799615263938904,
+      "learning_rate": 1.750736842105263e-05,
+      "loss": 3.0993,
+      "step": 167
+    },
+    {
+      "epoch": 0.09559032716927454,
+      "grad_norm": 0.8300429582595825,
+      "learning_rate": 1.6976842105263157e-05,
+      "loss": 2.8778,
+      "step": 168
+    },
+    {
+      "epoch": 0.09615931721194879,
+      "grad_norm": 0.8431521654129028,
+      "learning_rate": 1.6446315789473684e-05,
+      "loss": 3.0726,
+      "step": 169
+    },
+    {
+      "epoch": 0.09672830725462304,
+      "grad_norm": 0.9213804602622986,
+      "learning_rate": 1.591578947368421e-05,
+      "loss": 2.9397,
+      "step": 170
+    },
+    {
+      "epoch": 0.0972972972972973,
+      "grad_norm": 0.8189807534217834,
+      "learning_rate": 1.5385263157894736e-05,
+      "loss": 3.004,
+      "step": 171
+    },
+    {
+      "epoch": 0.09786628733997155,
+      "grad_norm": 0.8088762760162354,
+      "learning_rate": 1.485473684210526e-05,
+      "loss": 2.9671,
+      "step": 172
+    },
+    {
+      "epoch": 0.0984352773826458,
+      "grad_norm": 0.7970319986343384,
+      "learning_rate": 1.4324210526315789e-05,
+      "loss": 3.0041,
+      "step": 173
+    },
+    {
+      "epoch": 0.09900426742532005,
+      "grad_norm": 0.8069632649421692,
+      "learning_rate": 1.3793684210526316e-05,
+      "loss": 2.9655,
+      "step": 174
+    },
+    {
+      "epoch": 0.09957325746799431,
+      "grad_norm": 0.8151662349700928,
+      "learning_rate": 1.3263157894736841e-05,
+      "loss": 2.8613,
+      "step": 175
+    },
+    {
+      "epoch": 0.10014224751066857,
+      "grad_norm": 0.9155776500701904,
+      "learning_rate": 1.2732631578947369e-05,
+      "loss": 2.8786,
+      "step": 176
+    },
+    {
+      "epoch": 0.10071123755334281,
+      "grad_norm": 0.7725786566734314,
+      "learning_rate": 1.2202105263157895e-05,
+      "loss": 2.7416,
+      "step": 177
+    },
+    {
+      "epoch": 0.10128022759601707,
+      "grad_norm": 0.8339952230453491,
+      "learning_rate": 1.1671578947368421e-05,
+      "loss": 2.7873,
+      "step": 178
+    },
+    {
+      "epoch": 0.10184921763869133,
+      "grad_norm": 0.8299411535263062,
+      "learning_rate": 1.1141052631578946e-05,
+      "loss": 2.7875,
+      "step": 179
+    },
+    {
+      "epoch": 0.10241820768136557,
+      "grad_norm": 0.9416133761405945,
+      "learning_rate": 1.0610526315789472e-05,
+      "loss": 2.9303,
+      "step": 180
+    },
+    {
+      "epoch": 0.10298719772403983,
+      "grad_norm": 0.8860821723937988,
+      "learning_rate": 1.008e-05,
+      "loss": 2.7522,
+      "step": 181
+    },
+    {
+      "epoch": 0.10355618776671409,
+      "grad_norm": 0.8492652773857117,
+      "learning_rate": 9.549473684210526e-06,
+      "loss": 2.7127,
+      "step": 182
+    },
+    {
+      "epoch": 0.10412517780938833,
+      "grad_norm": 0.8111841678619385,
+      "learning_rate": 9.018947368421052e-06,
+      "loss": 2.8087,
+      "step": 183
+    },
+    {
+      "epoch": 0.10469416785206259,
+      "grad_norm": 0.8741388916969299,
+      "learning_rate": 8.488421052631579e-06,
+      "loss": 2.9206,
+      "step": 184
+    },
+    {
+      "epoch": 0.10526315789473684,
+      "grad_norm": 1.0461604595184326,
+      "learning_rate": 7.957894736842105e-06,
+      "loss": 3.0844,
+      "step": 185
+    },
+    {
+      "epoch": 0.1058321479374111,
+      "grad_norm": 0.9712023138999939,
+      "learning_rate": 7.42736842105263e-06,
+      "loss": 3.0991,
+      "step": 186
+    },
+    {
+      "epoch": 0.10640113798008535,
+      "grad_norm": 0.9134138226509094,
+      "learning_rate": 6.896842105263158e-06,
+      "loss": 2.8668,
+      "step": 187
+    },
+    {
+      "epoch": 0.1069701280227596,
+      "grad_norm": 0.9476480484008789,
+      "learning_rate": 6.3663157894736845e-06,
+      "loss": 3.0572,
+      "step": 188
+    },
+    {
+      "epoch": 0.10753911806543386,
+      "grad_norm": 0.9557372331619263,
+      "learning_rate": 5.835789473684211e-06,
+      "loss": 3.0734,
+      "step": 189
+    },
+    {
+      "epoch": 0.10810810810810811,
+      "grad_norm": 1.0418270826339722,
+      "learning_rate": 5.305263157894736e-06,
+      "loss": 3.2696,
+      "step": 190
+    },
+    {
+      "epoch": 0.10867709815078236,
+      "grad_norm": 1.0057651996612549,
+      "learning_rate": 4.774736842105263e-06,
+      "loss": 3.0763,
+      "step": 191
+    },
+    {
+      "epoch": 0.10924608819345662,
+      "grad_norm": 0.937343180179596,
+      "learning_rate": 4.244210526315789e-06,
+      "loss": 2.7003,
+      "step": 192
+    },
+    {
+      "epoch": 0.10981507823613086,
+      "grad_norm": 0.965437114238739,
+      "learning_rate": 3.713684210526315e-06,
+      "loss": 2.4666,
+      "step": 193
+    },
+    {
+      "epoch": 0.11038406827880512,
+      "grad_norm": 0.9241202473640442,
+      "learning_rate": 3.1831578947368422e-06,
+      "loss": 1.7998,
+      "step": 194
+    },
+    {
+      "epoch": 0.11095305832147938,
+      "grad_norm": 0.9487320184707642,
+      "learning_rate": 2.652631578947368e-06,
+      "loss": 1.6532,
+      "step": 195
+    },
+    {
+      "epoch": 0.11152204836415362,
+      "grad_norm": 1.029417872428894,
+      "learning_rate": 2.1221052631578947e-06,
+      "loss": 1.8063,
+      "step": 196
+    },
+    {
+      "epoch": 0.11209103840682788,
+      "grad_norm": 1.0649429559707642,
+      "learning_rate": 1.5915789473684211e-06,
+      "loss": 1.7311,
+      "step": 197
+    },
+    {
+      "epoch": 0.11266002844950214,
+      "grad_norm": 1.1187015771865845,
+      "learning_rate": 1.0610526315789473e-06,
+      "loss": 1.806,
+      "step": 198
+    },
+    {
+      "epoch": 0.11322901849217638,
+      "grad_norm": 1.6845424175262451,
+      "learning_rate": 5.305263157894737e-07,
+      "loss": 2.2611,
+      "step": 199
+    },
+    {
+      "epoch": 0.11379800853485064,
+      "grad_norm": 2.280428886413574,
+      "learning_rate": 0.0,
+      "loss": 2.5917,
+      "step": 200
+    },
+    {
+      "epoch": 0.11379800853485064,
+      "eval_loss": 1.405928611755371,
+      "eval_runtime": 176.2442,
+      "eval_samples_per_second": 4.199,
+      "eval_steps_per_second": 1.05,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.967507103547392e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null