Training in progress, step 38, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +144 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f3409b0f5efa7f40450782feb6a123591f249c238e1879c652d6d40f3459319
 size 50899792

 version https://git-lfs.github.com/spec/v1
+oid sha256:40978b6045d435860bdc653f2c1ad8723d28aa9333e29555d66768df70c763bb
 size 50899792

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3c18fce2e1841a394b8e913293aaac27b29b40846fa539e2acc0c900fb39e1c
 size 26231300

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6d6739e3b80f0c91dc11e94b1399835f20bb5b0a93df5bd3138f9f3a59d2d13
 size 26231300

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f918e894c61d79473825b52a272cf41e854c27a6d9183f7c13da913c2b3b6227
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d606d31d447120bc1b4de5890ffaff6e62d8521d8976078b55323f24cb5690d3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbe7944e3134660b6c8767b8065ebe88cdbbd95d926d0c63b94c69623d39c56b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c4f0183aec085119f27cd46c60ab3f231930ae66c7ca01d0adff96b44d5e0e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.25249169435215946,
   "eval_steps": 19,
-  "global_step": 19,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -156,6 +156,147 @@
       "eval_samples_per_second": 33.751,
       "eval_steps_per_second": 16.875,
       "step": 19
     }
   ],
   "logging_steps": 1,
@@ -175,7 +316,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1599094896721920.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5049833887043189,
   "eval_steps": 19,
+  "global_step": 38,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 33.751,
       "eval_steps_per_second": 16.875,
       "step": 19
+    },
+    {
+      "epoch": 0.26578073089701,
+      "grad_norm": 0.659614086151123,
+      "learning_rate": 0.00018888354486549237,
+      "loss": 0.7246,
+      "step": 20
+    },
+    {
+      "epoch": 0.27906976744186046,
+      "grad_norm": 0.4992068409919739,
+      "learning_rate": 0.00018660254037844388,
+      "loss": 0.5093,
+      "step": 21
+    },
+    {
+      "epoch": 0.292358803986711,
+      "grad_norm": 0.5019382238388062,
+      "learning_rate": 0.00018412535328311814,
+      "loss": 0.6098,
+      "step": 22
+    },
+    {
+      "epoch": 0.30564784053156147,
+      "grad_norm": 0.6148894429206848,
+      "learning_rate": 0.00018145759520503358,
+      "loss": 0.7219,
+      "step": 23
+    },
+    {
+      "epoch": 0.31893687707641194,
+      "grad_norm": 0.5921617150306702,
+      "learning_rate": 0.00017860530947427875,
+      "loss": 0.8486,
+      "step": 24
+    },
+    {
+      "epoch": 0.33222591362126247,
+      "grad_norm": 0.4521740674972534,
+      "learning_rate": 0.00017557495743542585,
+      "loss": 0.4504,
+      "step": 25
+    },
+    {
+      "epoch": 0.34551495016611294,
+      "grad_norm": 0.5427228808403015,
+      "learning_rate": 0.00017237340381050703,
+      "loss": 0.5575,
+      "step": 26
+    },
+    {
+      "epoch": 0.3588039867109635,
+      "grad_norm": 0.466899037361145,
+      "learning_rate": 0.00016900790114821122,
+      "loss": 0.7179,
+      "step": 27
+    },
+    {
+      "epoch": 0.37209302325581395,
+      "grad_norm": 0.4334196448326111,
+      "learning_rate": 0.00016548607339452853,
+      "loss": 0.6399,
+      "step": 28
+    },
+    {
+      "epoch": 0.3853820598006645,
+      "grad_norm": 0.39604687690734863,
+      "learning_rate": 0.00016181589862206052,
+      "loss": 0.4078,
+      "step": 29
+    },
+    {
+      "epoch": 0.39867109634551495,
+      "grad_norm": 0.4122071862220764,
+      "learning_rate": 0.00015800569095711982,
+      "loss": 0.4967,
+      "step": 30
+    },
+    {
+      "epoch": 0.4119601328903654,
+      "grad_norm": 0.473812997341156,
+      "learning_rate": 0.00015406408174555976,
+      "loss": 0.7088,
+      "step": 31
+    },
+    {
+      "epoch": 0.42524916943521596,
+      "grad_norm": 0.5842433571815491,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 0.7418,
+      "step": 32
+    },
+    {
+      "epoch": 0.43853820598006643,
+      "grad_norm": 0.5841939449310303,
+      "learning_rate": 0.00014582265217274104,
+      "loss": 0.761,
+      "step": 33
+    },
+    {
+      "epoch": 0.45182724252491696,
+      "grad_norm": 0.3879057765007019,
+      "learning_rate": 0.00014154150130018866,
+      "loss": 0.4902,
+      "step": 34
+    },
+    {
+      "epoch": 0.46511627906976744,
+      "grad_norm": 0.6922730803489685,
+      "learning_rate": 0.00013716624556603274,
+      "loss": 0.7617,
+      "step": 35
+    },
+    {
+      "epoch": 0.47840531561461797,
+      "grad_norm": 0.5831931233406067,
+      "learning_rate": 0.00013270679633174218,
+      "loss": 0.8443,
+      "step": 36
+    },
+    {
+      "epoch": 0.49169435215946844,
+      "grad_norm": 0.5058356523513794,
+      "learning_rate": 0.00012817325568414297,
+      "loss": 0.6578,
+      "step": 37
+    },
+    {
+      "epoch": 0.5049833887043189,
+      "grad_norm": 0.3720795512199402,
+      "learning_rate": 0.00012357589355094275,
+      "loss": 0.5019,
+      "step": 38
+    },
+    {
+      "epoch": 0.5049833887043189,
+      "eval_loss": 0.690031886100769,
+      "eval_runtime": 0.9404,
+      "eval_samples_per_second": 34.029,
+      "eval_steps_per_second": 17.015,
+      "step": 38
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3157187360194560.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null