Training in progress, step 400

Files changed (5) hide show

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 30.0,
     "total_flos": 2.006776622592e+17,
-    "train_loss": 4.5579022089640295,
-    "train_runtime": 124.8123,
-    "train_samples_per_second": 3.365,
-    "train_steps_per_second": 0.961
 }

 {
     "epoch": 30.0,
     "total_flos": 2.006776622592e+17,
+    "train_loss": 4.1240334192911785,
+    "train_runtime": 64.9739,
+    "train_samples_per_second": 6.464,
+    "train_steps_per_second": 1.847
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2510e3b4d354c14f689771f130c169e0d1f7915c38b97ef7d3309890f451ac84
 size 166494824

 version https://git-lfs.github.com/spec/v1
+oid sha256:be6161b8d8cf753792787ef1c8ddfeda8a580423b07c5d08a48373b7a8194f94
 size 166494824

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 30.0,
     "total_flos": 2.006776622592e+17,
-    "train_loss": 4.5579022089640295,
-    "train_runtime": 124.8123,
-    "train_samples_per_second": 3.365,
-    "train_steps_per_second": 0.961
 }

 {
     "epoch": 30.0,
     "total_flos": 2.006776622592e+17,
+    "train_loss": 4.1240334192911785,
+    "train_runtime": 64.9739,
+    "train_samples_per_second": 6.464,
+    "train_steps_per_second": 1.847
 }

trainer_state.json CHANGED Viewed

@@ -11,26 +11,26 @@
   "log_history": [
     {
       "epoch": 12.5,
-      "grad_norm": 288.3207702636719,
-      "learning_rate": 3.2500000000000002e-06,
-      "loss": 4.9309,
       "step": 50
     },
     {
       "epoch": 25.0,
-      "grad_norm": 51.23756408691406,
-      "learning_rate": 1.1666666666666668e-06,
-      "loss": 4.4404,
       "step": 100
     },
     {
       "epoch": 30.0,
       "step": 120,
       "total_flos": 2.006776622592e+17,
-      "train_loss": 4.5579022089640295,
-      "train_runtime": 124.8123,
-      "train_samples_per_second": 3.365,
-      "train_steps_per_second": 0.961
     }
   ],
   "logging_steps": 50,

   "log_history": [
     {
       "epoch": 12.5,
+      "grad_norm": 309.19952392578125,
+      "learning_rate": 3.2916666666666668e-06,
+      "loss": 5.09,
       "step": 50
     },
     {
       "epoch": 25.0,
+      "grad_norm": 290.5226745605469,
+      "learning_rate": 1.2083333333333333e-06,
+      "loss": 3.4821,
       "step": 100
     },
     {
       "epoch": 30.0,
       "step": 120,
       "total_flos": 2.006776622592e+17,
+      "train_loss": 4.1240334192911785,
+      "train_runtime": 64.9739,
+      "train_samples_per_second": 6.464,
+      "train_steps_per_second": 1.847
     }
   ],
   "logging_steps": 50,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d7abd29495988426f34f1c7ec4f52b77b3753a4b217dc36f1e379bfb88dc74b
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:187f95f3094c9306cec0d5ac8d4f752f5d6f27df0406097ee377389ae4d8df22
 size 5240