Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06b103d26d07c4ed42fbee8ace2fe6573cc33fa4640fc5ac8cdb9f08c6422c37
 size 349243752

 version https://git-lfs.github.com/spec/v1
+oid sha256:2634d7d437e55d120f9cbfc1a0a647ba5e43707198ec229840f3cbd4c2c6010d
 size 349243752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c971da1af950d6b26f168fac122add84f133c26a319b4d172666d21bb888d4e9
 size 177909253

 version https://git-lfs.github.com/spec/v1
+oid sha256:77d6a6963848ad083ec9f75340262560feabd272bc333bcca7686444ebcc3703
 size 177909253

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68059c8080c3be6efde532e424e65c8e3d05b6e6e4a6bc5308804f229fa094d9
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb0ee16dc80f0a35b0d265d0738a01e60e4a6bc16d147140edaf591ee6a31fd0
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cfb0202afd5938a45bdfdc38b8ac97c84ae18383bac3b871da4549cec70e22e
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:326d14f5288c1d48dfb46fecae7e18839fcfd032bd3fac00e22ed4ca25616087
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4975124378109453,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -358,6 +358,76 @@
       "learning_rate": 1.196412859476037e-05,
       "loss": 1.4087,
       "step": 500
     }
   ],
   "logging_steps": 10,
@@ -377,7 +447,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.76010719956566e+17,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5970149253731343,
   "eval_steps": 500,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.196412859476037e-05,
       "loss": 1.4087,
       "step": 500
+    },
+    {
+      "epoch": 0.5074626865671642,
+      "grad_norm": 0.15541517734527588,
+      "learning_rate": 1.1961691644685907e-05,
+      "loss": 1.3638,
+      "step": 510
+    },
+    {
+      "epoch": 0.5174129353233831,
+      "grad_norm": 0.14922229945659637,
+      "learning_rate": 1.195917487915494e-05,
+      "loss": 1.4186,
+      "step": 520
+    },
+    {
+      "epoch": 0.527363184079602,
+      "grad_norm": 0.1509064882993698,
+      "learning_rate": 1.1956578331862066e-05,
+      "loss": 1.3244,
+      "step": 530
+    },
+    {
+      "epoch": 0.5373134328358209,
+      "grad_norm": 0.15153075754642487,
+      "learning_rate": 1.1953902037570002e-05,
+      "loss": 1.3692,
+      "step": 540
+    },
+    {
+      "epoch": 0.5472636815920398,
+      "grad_norm": 0.18627804517745972,
+      "learning_rate": 1.1951146032109126e-05,
+      "loss": 1.3707,
+      "step": 550
+    },
+    {
+      "epoch": 0.5572139303482587,
+      "grad_norm": 0.1417003720998764,
+      "learning_rate": 1.1948310352376988e-05,
+      "loss": 1.3976,
+      "step": 560
+    },
+    {
+      "epoch": 0.5671641791044776,
+      "grad_norm": 0.14316676557064056,
+      "learning_rate": 1.1945395036337829e-05,
+      "loss": 1.4397,
+      "step": 570
+    },
+    {
+      "epoch": 0.5771144278606966,
+      "grad_norm": 0.11908440291881561,
+      "learning_rate": 1.1942400123022057e-05,
+      "loss": 1.3614,
+      "step": 580
+    },
+    {
+      "epoch": 0.5870646766169154,
+      "grad_norm": 0.15239761769771576,
+      "learning_rate": 1.1939325652525737e-05,
+      "loss": 1.4346,
+      "step": 590
+    },
+    {
+      "epoch": 0.5970149253731343,
+      "grad_norm": 0.14442642033100128,
+      "learning_rate": 1.193617166601005e-05,
+      "loss": 1.3687,
+      "step": 600
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.5084597833170944e+17,
   "train_batch_size": 24,
   "trial_name": null,
   "trial_params": null