Training in progress, step 2375, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 479005064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf8a4840744445d7b5c7b194ce939998d6352bc50adf06a5d078481ee8297373
|
3 |
size 479005064
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 958299770
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b9dc156f7ab8d8c101ab11f89633c51a3e1ef772e6f7e5bdbdb528550c82290
|
3 |
size 958299770
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f8e4772e690bdadd5a1f02432bda81d45b48b1b69475cf388770c2e827ad5db
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bf8199a4f0174f0fea821cfd5e9428ebb987d6fd85e497fec1ce048c10e54b7
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2827,6 +2827,30 @@
|
|
2827 |
"reward_std": 0.6644300162792206,
|
2828 |
"rewards/custom_reward_logic_v4_batch_streak_dblog": 1.0077000059187413,
|
2829 |
"step": 2350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2830 |
}
|
2831 |
],
|
2832 |
"logging_steps": 10,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.01891510899084907,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2375,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2827 |
"reward_std": 0.6644300162792206,
|
2828 |
"rewards/custom_reward_logic_v4_batch_streak_dblog": 1.0077000059187413,
|
2829 |
"step": 2350
|
2830 |
+
},
|
2831 |
+
{
|
2832 |
+
"completion_length": 669.8,
|
2833 |
+
"epoch": 0.018795645144591075,
|
2834 |
+
"grad_norm": 0.27531981468200684,
|
2835 |
+
"kl": 2.2293329825624824,
|
2836 |
+
"learning_rate": 4.229604321829561e-09,
|
2837 |
+
"loss": 0.0892,
|
2838 |
+
"reward": 1.234949996136129,
|
2839 |
+
"reward_std": 0.5681714907288551,
|
2840 |
+
"rewards/custom_reward_logic_v4_batch_streak_dblog": 1.234949996136129,
|
2841 |
+
"step": 2360
|
2842 |
+
},
|
2843 |
+
{
|
2844 |
+
"completion_length": 708.00625,
|
2845 |
+
"epoch": 0.01887528770876307,
|
2846 |
+
"grad_norm": 1.1700477600097656,
|
2847 |
+
"kl": 1.8451879311352968,
|
2848 |
+
"learning_rate": 2.3794460453555046e-09,
|
2849 |
+
"loss": 0.0738,
|
2850 |
+
"reward": 1.284224995970726,
|
2851 |
+
"reward_std": 0.6412660963833332,
|
2852 |
+
"rewards/custom_reward_logic_v4_batch_streak_dblog": 1.284224995970726,
|
2853 |
+
"step": 2370
|
2854 |
}
|
2855 |
],
|
2856 |
"logging_steps": 10,
|