Training in progress, step 2375, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2965242f70dc1b0b7596bb3ed9c426020bbf4517ebeed2c8a2440fdbf945be50
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf8a4840744445d7b5c7b194ce939998d6352bc50adf06a5d078481ee8297373
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0faf19b75c250e062a7d5e1c008f697e26ff098a62aa8d568ece83819eebe8c8
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b9dc156f7ab8d8c101ab11f89633c51a3e1ef772e6f7e5bdbdb528550c82290
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3407d6a4fde3184e3536d76d706c2b1d771d2787d50884792a8278c57e98e4d6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f8e4772e690bdadd5a1f02432bda81d45b48b1b69475cf388770c2e827ad5db
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ac1ae36b6fe28bd1bd87cf104833c636a87321c3eebcde0b251d4ada5179d85
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bf8199a4f0174f0fea821cfd5e9428ebb987d6fd85e497fec1ce048c10e54b7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01871600258041908,
   "eval_steps": 500,
-  "global_step": 2350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2827,6 +2827,30 @@
       "reward_std": 0.6644300162792206,
       "rewards/custom_reward_logic_v4_batch_streak_dblog": 1.0077000059187413,
       "step": 2350
     }
   ],
   "logging_steps": 10,

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.01891510899084907,
   "eval_steps": 500,
+  "global_step": 2375,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "reward_std": 0.6644300162792206,
       "rewards/custom_reward_logic_v4_batch_streak_dblog": 1.0077000059187413,
       "step": 2350
+    },
+    {
+      "completion_length": 669.8,
+      "epoch": 0.018795645144591075,
+      "grad_norm": 0.27531981468200684,
+      "kl": 2.2293329825624824,
+      "learning_rate": 4.229604321829561e-09,
+      "loss": 0.0892,
+      "reward": 1.234949996136129,
+      "reward_std": 0.5681714907288551,
+      "rewards/custom_reward_logic_v4_batch_streak_dblog": 1.234949996136129,
+      "step": 2360
+    },
+    {
+      "completion_length": 708.00625,
+      "epoch": 0.01887528770876307,
+      "grad_norm": 1.1700477600097656,
+      "kl": 1.8451879311352968,
+      "learning_rate": 2.3794460453555046e-09,
+      "loss": 0.0738,
+      "reward": 1.284224995970726,
+      "reward_std": 0.6412660963833332,
+      "rewards/custom_reward_logic_v4_batch_streak_dblog": 1.284224995970726,
+      "step": 2370
     }
   ],
   "logging_steps": 10,