smirki commited on
Commit
538d784
·
verified ·
1 Parent(s): 53fa85e

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51d2bfe427ae3327bc33ac81631f2aa644863894dc2053c7a9baffcc189de959
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:659d6e1779ca606e9015006ef5de68aba0e8374af8c18cedc60c33a7bf451e5c
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a853020d3765589fb3bb1ff546e98661c228054d39f41c9223f630545c6ea646
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1671a35686771c80a687de1f74c1a702fdc2a1104adb9e2bf4d994f98392f085
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11d2e7595e9ba9bfe2c796cf0b04f0964398eb3358cd06be11438c9c6d1e9663
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8306b273eeea1e7d7babc48af3074feb456a19148c20b9d256b84d0476e8efcc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb505857bd564bb81d4dfe3d81be04ff6342c0ec4fa225ccc8dfd554aa405567
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b017f223f7cbfcbd91ec9cfbd378a9bc9c27e3fa5227c019815cc49a05577b2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.002588383335589873,
5
  "eval_steps": 500,
6
- "global_step": 325,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -391,6 +391,42 @@
391
  "reward_std": 0.20657491832971572,
392
  "rewards/custom_reward_logic_v4_batch_streak_dblog": 0.26712499260902406,
393
  "step": 320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
  }
395
  ],
396
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.002787489746019863,
5
  "eval_steps": 500,
6
+ "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
391
  "reward_std": 0.20657491832971572,
392
  "rewards/custom_reward_logic_v4_batch_streak_dblog": 0.26712499260902406,
393
  "step": 320
394
+ },
395
+ {
396
+ "completion_length": 18.91875,
397
+ "epoch": 0.002628204617675871,
398
+ "grad_norm": 1.5010004043579102,
399
+ "kl": 0.4104623213410378,
400
+ "learning_rate": 4.978612153434527e-06,
401
+ "loss": 0.0164,
402
+ "reward": 0.3325000023469329,
403
+ "reward_std": 0.2386787176132202,
404
+ "rewards/custom_reward_logic_v4_batch_streak_dblog": 0.3325000023469329,
405
+ "step": 330
406
+ },
407
+ {
408
+ "completion_length": 17.9875,
409
+ "epoch": 0.002707847181847867,
410
+ "grad_norm": 3.6247098445892334,
411
+ "kl": 0.45523770749568937,
412
+ "learning_rate": 4.973604096452361e-06,
413
+ "loss": 0.0182,
414
+ "reward": 0.19924999605864285,
415
+ "reward_std": 0.153281569480896,
416
+ "rewards/custom_reward_logic_v4_batch_streak_dblog": 0.19924999605864285,
417
+ "step": 340
418
+ },
419
+ {
420
+ "completion_length": 19.43125,
421
+ "epoch": 0.002787489746019863,
422
+ "grad_norm": 0.7098278403282166,
423
+ "kl": 0.3987171895802021,
424
+ "learning_rate": 4.968072782793436e-06,
425
+ "loss": 0.0159,
426
+ "reward": 0.33292500320822,
427
+ "reward_std": 0.07614715248346329,
428
+ "rewards/custom_reward_logic_v4_batch_streak_dblog": 0.33292500320822,
429
+ "step": 350
430
  }
431
  ],
432
  "logging_steps": 10,