Training in progress, step 300, checkpoint

Files changed (12) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8efdade0cb5a88e7bb84baf00431f1e949a1d97905099ce8baa5c2fb6557a92
 size 250490408

 version https://git-lfs.github.com/spec/v1
+oid sha256:7063ac72bc493c2ecd4369c55c21d5de8db965f71780aadc11da18aa21d9329f
 size 250490408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cabfebf08b3f0797e17576ebbca12e321519d675825a837fe8d7631d38564f3
 size 255266042

 version https://git-lfs.github.com/spec/v1
+oid sha256:d69b2147df9ff1ad66fb6ee03e554b608555b9dbe5f3d868491082ce84269e03
 size 255266042

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c12d999b218b9e0c08a198bdc20f4014226e0eb19ed7655fc23d50f8ab343125
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b21d47cc7a4882efa3cfcffab7b0a295e807aa32cf4d422f2ea0fee6fdc78b5
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b5a0722e5f1249ecc7dcc488dc3edd8be880d7169ebbbb7726d7ada5116599da
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:08154ff6d5c399867155df805827bb349adaeaef25fdf6ef80c05783808ea6d0
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12c9920376948bfaa9c4b47bf2e57546fd9524fa8fa0172ec3ef098fa0e0fbd0
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:46026d2af163f08f85e957f0222b624a010a302546a564969fabdb6bc7041c10
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1bc026f12d56804c55770111d66d346e5d18da613b4ee3d0022c7359c56b890
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:6df445c62f2ad56caaae41468d1a5cce30880f7650579c3cdba80ec12821e5de
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9998f72771668f2f13bc0341ee31978ca60bd65552b0d7c421424407a7cc766
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e76900c3a49367b21bcd4b796fdf1376987ed660638bdf98ccd45d1a87f5c184
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:147905fc29f1cfbaf89a4e002f423388bf823666fbc77ed4847eed5fcfe9547b
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a446f1cd24dab7caff9cadff49a53cd1909d2fb5bba7af3112c5b9d54f4308b
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2859ec2ff93fe92adbbf3106188a48db0ef913cbea0d177678eaf186182cc081
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:507ef075351dc4050f0467f54693ae3336a7708023dc0aefe5fd3e986b17b4a8
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc345d3fd1180d770d05a679357686fca0a650138ef0bb18c8a0e23cf4e525e7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:de7dde0309a85b36fb0802e6f2c30acf442b986826853119d1478a511a694e77
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0985d0257c892696fb7285dad69becde2a1197c31dbd94a987186cfae751de11
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:74485e67705dc36efbfb69b1e54f842e1ff07894d01bb0e36d6d2526a318b300
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 93.33333333333333,
   "eval_steps": 40,
-  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -267,6 +267,20 @@
       "eval_samples_per_second": 584.495,
       "eval_steps_per_second": 2.336,
       "step": 280
     }
   ],
   "logging_steps": 10,
@@ -281,12 +295,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.257124823577395e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 100.0,
   "eval_steps": 40,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 584.495,
       "eval_steps_per_second": 2.336,
       "step": 280
+    },
+    {
+      "epoch": 96.66666666666667,
+      "grad_norm": 1.1640625,
+      "learning_rate": 5.478104631726711e-07,
+      "loss": 2.8196,
+      "step": 290
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 1.1796875,
+      "learning_rate": 0.0,
+      "loss": 2.8249,
+      "step": 300
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.774796614362726e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null