Training in progress, step 50, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +81 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:939c51cc2293246b64bb1959811fc88d589257a53aa1f711a77986a7a4bb5e68
 size 26008

 version https://git-lfs.github.com/spec/v1
+oid sha256:eea02a42e3c436a9d9ca773b10f834e7afb91e0f46930df7e9f9e294ebf95f54
 size 26008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d87a9dc6ac0f9d1f605c68b17c6f864ca9882717c9116882dad41bbccf95fab
 size 61926

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9b55d55ff6e805a3e78a7c97d51ebd215c85fb8543012712355f01aa220618f
 size 61926

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:328fe9968df7bf20cce6c816bd538e730876deaee486e22d46258667abf17afc
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:3531fd590b8fd6fcbddb61e6b46ac1b997cf7cef5113053fb7f5015848c30f35
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eba43fdf56448fad0ffc4b3800ab3289204a7137e290f6a13302e4d4c52a37fd
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:29bc15d430c252c1243d6e8d1875dc4c35f8e88790da8d78ff009203f04072e9
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75c3c8ae7ddabeca3cb69703f6b35f00b9125a4c27b56ccba4ed0e4669bb5433
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6175160811479465,
   "eval_steps": 13,
-  "global_step": 39,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -312,6 +312,83 @@
       "eval_samples_per_second": 121.464,
       "eval_steps_per_second": 30.509,
       "step": 39
     }
   ],
   "logging_steps": 1,
@@ -326,12 +403,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 942458142720.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7916872835230084,
   "eval_steps": 13,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 121.464,
       "eval_steps_per_second": 30.509,
       "step": 39
+    },
+    {
+      "epoch": 0.6333498268184067,
+      "grad_norm": 0.008273452520370483,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 11.9306,
+      "step": 40
+    },
+    {
+      "epoch": 0.6491835724888669,
+      "grad_norm": 0.0072366236709058285,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 11.9315,
+      "step": 41
+    },
+    {
+      "epoch": 0.6650173181593271,
+      "grad_norm": 0.007517676800489426,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 11.9309,
+      "step": 42
+    },
+    {
+      "epoch": 0.6808510638297872,
+      "grad_norm": 0.00758866872638464,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 11.9314,
+      "step": 43
+    },
+    {
+      "epoch": 0.6966848095002474,
+      "grad_norm": 0.006661114748567343,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 11.9313,
+      "step": 44
+    },
+    {
+      "epoch": 0.7125185551707076,
+      "grad_norm": 0.007711367215961218,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 11.9301,
+      "step": 45
+    },
+    {
+      "epoch": 0.7283523008411678,
+      "grad_norm": 0.005763665772974491,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 11.9307,
+      "step": 46
+    },
+    {
+      "epoch": 0.7441860465116279,
+      "grad_norm": 0.006471547763794661,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 11.9307,
+      "step": 47
+    },
+    {
+      "epoch": 0.760019792182088,
+      "grad_norm": 0.00781251024454832,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 11.9312,
+      "step": 48
+    },
+    {
+      "epoch": 0.7758535378525483,
+      "grad_norm": 0.0064762914553284645,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 11.931,
+      "step": 49
+    },
+    {
+      "epoch": 0.7916872835230084,
+      "grad_norm": 0.00732074398547411,
+      "learning_rate": 0.0,
+      "loss": 11.9304,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1208519884800.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null