Upload checkpoint 8702

Browse files

Files changed (5) hide show

model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
optimizer.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +144 -4

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3f2b3d40d5d70cd69a4100d88be206276bdab19bdf6613332b1a3e205e2a0d3
 size 4957560304

 version https://git-lfs.github.com/spec/v1
+oid sha256:89b6ed8f166125c4e0ccb4438463feac8c4befdcfa3b5fc23df50b931dd37964
 size 4957560304

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:47bda3ba51a5d0d14e4e6d1e5e1a1a499e86cac640e7a65e02a17da73c72f9bc
 size 3989163248

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfeea3b480b0ac3a7be7edb3e3d45b0b6eed0bc230ddbdd298ef3463ce89ffd9
 size 3989163248

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c078439a2268de07efa415fd281c455eb2d1821139fd97a7039a0e011516f93
 size 17893865224

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc5cc673934837b111c47f04c42854cbbf4155db979be7dde3c8474b55635ed6
 size 17893865224

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:618163f6be35a05ac5e460626b2bd512fd3db05ca7c47320572b7f41f91978fb
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb0516760d88d4baa388b6e998401e0078c7ad3407932309df513a21a23fcf7a
 size 1064

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9928181557023844,
   "eval_steps": 500,
-  "global_step": 8640,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -20167,6 +20167,146 @@
       "learning_rate": 2.5107040290095474e-08,
       "loss": 1.6549,
       "step": 8640
     }
   ],
   "logging_steps": 3,
@@ -20181,12 +20321,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.1423252906849075e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9999425452456191,
   "eval_steps": 500,
+  "global_step": 8702,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.5107040290095474e-08,
       "loss": 1.6549,
       "step": 8640
+    },
+    {
+      "epoch": 0.9931628842286699,
+      "grad_norm": 0.38671875,
+      "learning_rate": 2.2736199954154213e-08,
+      "loss": 1.6457,
+      "step": 8643
+    },
+    {
+      "epoch": 0.9935076127549555,
+      "grad_norm": 0.3828125,
+      "learning_rate": 2.0482904581231588e-08,
+      "loss": 1.6282,
+      "step": 8646
+    },
+    {
+      "epoch": 0.993852341281241,
+      "grad_norm": 0.376953125,
+      "learning_rate": 1.834715682056398e-08,
+      "loss": 1.654,
+      "step": 8649
+    },
+    {
+      "epoch": 0.9941970698075265,
+      "grad_norm": 0.388671875,
+      "learning_rate": 1.632895918319832e-08,
+      "loss": 1.6449,
+      "step": 8652
+    },
+    {
+      "epoch": 0.9945417983338122,
+      "grad_norm": 0.376953125,
+      "learning_rate": 1.4428314041958767e-08,
+      "loss": 1.6957,
+      "step": 8655
+    },
+    {
+      "epoch": 0.9948865268600977,
+      "grad_norm": 0.380859375,
+      "learning_rate": 1.2645223631457815e-08,
+      "loss": 1.6494,
+      "step": 8658
+    },
+    {
+      "epoch": 0.9952312553863832,
+      "grad_norm": 0.39453125,
+      "learning_rate": 1.0979690048107394e-08,
+      "loss": 1.6152,
+      "step": 8661
+    },
+    {
+      "epoch": 0.9955759839126688,
+      "grad_norm": 0.37109375,
+      "learning_rate": 9.431715250118878e-09,
+      "loss": 1.5803,
+      "step": 8664
+    },
+    {
+      "epoch": 0.9959207124389543,
+      "grad_norm": 0.400390625,
+      "learning_rate": 8.001301057447563e-09,
+      "loss": 1.6867,
+      "step": 8667
+    },
+    {
+      "epoch": 0.9962654409652398,
+      "grad_norm": 0.392578125,
+      "learning_rate": 6.688449151881493e-09,
+      "loss": 1.6015,
+      "step": 8670
+    },
+    {
+      "epoch": 0.9966101694915255,
+      "grad_norm": 0.390625,
+      "learning_rate": 5.493161076941533e-09,
+      "loss": 1.5957,
+      "step": 8673
+    },
+    {
+      "epoch": 0.996954898017811,
+      "grad_norm": 0.373046875,
+      "learning_rate": 4.415438237959091e-09,
+      "loss": 1.6479,
+      "step": 8676
+    },
+    {
+      "epoch": 0.9972996265440965,
+      "grad_norm": 0.37890625,
+      "learning_rate": 3.455281902031704e-09,
+      "loss": 1.6768,
+      "step": 8679
+    },
+    {
+      "epoch": 0.9976443550703821,
+      "grad_norm": 0.3671875,
+      "learning_rate": 2.612693198023042e-09,
+      "loss": 1.6631,
+      "step": 8682
+    },
+    {
+      "epoch": 0.9979890835966676,
+      "grad_norm": 0.380859375,
+      "learning_rate": 1.8876731165962115e-09,
+      "loss": 1.6653,
+      "step": 8685
+    },
+    {
+      "epoch": 0.9983338121229531,
+      "grad_norm": 0.369140625,
+      "learning_rate": 1.2802225101471444e-09,
+      "loss": 1.5957,
+      "step": 8688
+    },
+    {
+      "epoch": 0.9986785406492388,
+      "grad_norm": 0.392578125,
+      "learning_rate": 7.903420928823124e-10,
+      "loss": 1.6107,
+      "step": 8691
+    },
+    {
+      "epoch": 0.9990232691755243,
+      "grad_norm": 0.384765625,
+      "learning_rate": 4.1803244075211414e-10,
+      "loss": 1.6701,
+      "step": 8694
+    },
+    {
+      "epoch": 0.9993679977018098,
+      "grad_norm": 0.384765625,
+      "learning_rate": 1.6329399149528356e-10,
+      "loss": 1.6652,
+      "step": 8697
+    },
+    {
+      "epoch": 0.9997127262280954,
+      "grad_norm": 0.384765625,
+      "learning_rate": 2.6127044616686137e-11,
+      "loss": 1.7509,
+      "step": 8700
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.1626194748539142e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null