Training in progress, step 8500, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f672218c5e4e6a2121de04b65360eb70212979671319a6b7ccc699db76402d01
 size 1783055976

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac1a75e495e37659f935a94a153e5ae964ac453c26fb6a9e6d16dca433447a46
 size 1783055976

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2097d5365d132bf9645baefe54a0be746abe632499a074cdbc93777d0e5c34c3
 size 3566173562

 version https://git-lfs.github.com/spec/v1
+oid sha256:c30fdbb96dddd647ac7cab10b09e0ee2785b645bae4fc61cc48fe5171deae209
 size 3566173562

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f83fe17529e572dff2734bb21512b28dd7cf5d20ef0e84688f5068ffbf24e765
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9213080fe2b45399b87036ca9ff9164533abe6b368e5c828136ee184486749d4
 size 14244

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97bba99094cb6ba509984c3fb752cc4387fde3be7cca9c123af30577d2dd911a
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:68f6c298533d8c912a29bc1b5e945c92e9b2f851b00f21834ca034f7df9e34ae
 size 988

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d45cef01a0c03a5fc76309b06f41731c4ef0f05769be4b933a78626a0c047135
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3965fdcf65adda4a516b1c5cd32d581e539d552e45f86bb9fe1a08df9bbf9fad
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01789334893036915,
   "eval_steps": 500,
-  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1256,10 +1256,88 @@
       "eval_samples_per_second": 6.18,
       "eval_steps_per_second": 6.18,
       "step": 8000
     }
   ],
   "logging_steps": 50,
-  "max_steps": 1341282,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
@@ -1275,7 +1353,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.7532976996352e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07604673295406889,
   "eval_steps": 500,
+  "global_step": 8500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.18,
       "eval_steps_per_second": 6.18,
       "step": 8000
+    },
+    {
+      "epoch": 0.07202072944473584,
+      "grad_norm": 0.6482174396514893,
+      "learning_rate": 4.0004970178926446e-05,
+      "loss": 0.3199,
+      "step": 8050
+    },
+    {
+      "epoch": 0.07246806316799506,
+      "grad_norm": 0.560840368270874,
+      "learning_rate": 4.025347912524851e-05,
+      "loss": 0.3203,
+      "step": 8100
+    },
+    {
+      "epoch": 0.07291539689125429,
+      "grad_norm": 0.47107866406440735,
+      "learning_rate": 4.050198807157058e-05,
+      "loss": 0.3277,
+      "step": 8150
+    },
+    {
+      "epoch": 0.07336273061451352,
+      "grad_norm": 0.6409516334533691,
+      "learning_rate": 4.075049701789265e-05,
+      "loss": 0.3141,
+      "step": 8200
+    },
+    {
+      "epoch": 0.07381006433777275,
+      "grad_norm": 0.5527054667472839,
+      "learning_rate": 4.0999005964214716e-05,
+      "loss": 0.3121,
+      "step": 8250
+    },
+    {
+      "epoch": 0.07425739806103197,
+      "grad_norm": 0.5012880563735962,
+      "learning_rate": 4.124751491053678e-05,
+      "loss": 0.3015,
+      "step": 8300
+    },
+    {
+      "epoch": 0.07470473178429121,
+      "grad_norm": 0.44512906670570374,
+      "learning_rate": 4.149602385685885e-05,
+      "loss": 0.3145,
+      "step": 8350
+    },
+    {
+      "epoch": 0.07515206550755044,
+      "grad_norm": 0.5861555337905884,
+      "learning_rate": 4.174453280318092e-05,
+      "loss": 0.3077,
+      "step": 8400
+    },
+    {
+      "epoch": 0.07559939923080966,
+      "grad_norm": 0.5449799299240112,
+      "learning_rate": 4.199304174950298e-05,
+      "loss": 0.3056,
+      "step": 8450
+    },
+    {
+      "epoch": 0.07604673295406889,
+      "grad_norm": 0.6001898646354675,
+      "learning_rate": 4.224155069582505e-05,
+      "loss": 0.3066,
+      "step": 8500
+    },
+    {
+      "epoch": 0.07604673295406889,
+      "eval_loss": 0.3047943115234375,
+      "eval_runtime": 226.5226,
+      "eval_samples_per_second": 39.877,
+      "eval_steps_per_second": 4.988,
+      "step": 8500
     }
   ],
   "logging_steps": 50,
+  "max_steps": 335322,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 1.0941622124544e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e104d5d3fed57e888a9b365bb5a58dcb175d2c55ee60c594628217a2bfea4cd
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:405d1c439695043016bcffc14d4eaca75fd1542cf769a56c446e9d029109e3c1
 size 5432