AmberYifan commited on
Commit
e9b24c9
·
verified ·
1 Parent(s): 5d44fb2

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/global_step3744/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10aa22d0b86834e271c2260d25e9d0e6a2220dae19d9787828e4348b1421e2f7
3
+ size 30462473157
last-checkpoint/global_step3744/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b1dd7b8f42c293ec88f22157ab743fda1e47b9342f250d6719a9f6f39b8d778
3
+ size 30462473157
last-checkpoint/global_step3744/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac3896df02945578505e5be29a4b6b7c9a2e582aaed23c9ae20b3db17eab617
3
+ size 168021
last-checkpoint/global_step3744/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a000715579fa90fb876d81d3a862e534315f1dcd52c014cd773aa4327cab5466
3
+ size 168021
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2496
 
1
+ global_step3744
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fc827e0e29fe68064287084a789c280ee07071edfaf802494bfd45a4328d9b4
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5177a8e6782bb88bda2135ebe4b556b112bb813fd6ff6a0be384c71f8e0189d6
3
  size 4877660776
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17eff878abe01ff745e70abc42b2798b23c5cca5a165c37c10a14930bca0d0c6
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8950fe3195f952346a0b52c6dfe6413f42e8dbbef2ad1916bd02c754b27342
3
  size 4932751008
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:614d95265342337ab7854c0fec6ee9677c78a2d9c93f081a8c779cc5a910acd8
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb232eefc76f6ffc2aa58fe324177383ef7b1c3560473ced7dfbf1ccf253d47
3
  size 4330865200
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a018f26b180a313b65824cdc68761f2b6f1700c2d360dfc69b6300fcb5e461b
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cabea12f73243814a74bfd1b213f64fb4c6348bad93b08cd5641d428fd19614e
3
  size 1089994880
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9698021f2d84167912e7be6ba48d3d2b8d6b20894f23319f36df078c03b33a64
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95430508d31bbe1a66a940e2572bf04addefae3e1c4e861e8657f66d302aa23e
3
  size 14768
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90a140d1d010220b1679bf6e519f8d3d518cb57331e0e7fb30008dc00e427811
3
  size 14768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b70a6983e6085768b4d2b447a8bd35374bce9cf4ea8e8fbefc1260ca2e054a70
3
  size 14768
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76861627b7d29ad5e6036c31e8897f76435dd322d24f5d6f9e3f8afd8fab8ced
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa515306b1c7f0b288d157b662096d041c14c3bbec98c937898173d64b5dfffa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 2496,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3789,6 +3789,1897 @@
3789
  "eval_samples_per_second": 7.686,
3790
  "eval_steps_per_second": 0.966,
3791
  "step": 2496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3792
  }
3793
  ],
3794
  "logging_steps": 10,
@@ -3803,7 +5694,7 @@
3803
  "should_evaluate": false,
3804
  "should_log": false,
3805
  "should_save": true,
3806
- "should_training_stop": false
3807
  },
3808
  "attributes": {}
3809
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 3744,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3789
  "eval_samples_per_second": 7.686,
3790
  "eval_steps_per_second": 0.966,
3791
  "step": 2496
3792
+ },
3793
+ {
3794
+ "epoch": 2.003205128205128,
3795
+ "grad_norm": 4.521723680059775e-05,
3796
+ "learning_rate": 1.84624517661027e-07,
3797
+ "logits/chosen": 0.8828125,
3798
+ "logits/rejected": 1.203125,
3799
+ "logps/chosen": -166.0,
3800
+ "logps/rejected": -320.0,
3801
+ "loss": 0.0,
3802
+ "rewards/accuracies": 1.0,
3803
+ "rewards/chosen": 3.625,
3804
+ "rewards/margins": 22.875,
3805
+ "rewards/rejected": -19.25,
3806
+ "step": 2500
3807
+ },
3808
+ {
3809
+ "epoch": 2.011217948717949,
3810
+ "grad_norm": 3.030267966817534e-06,
3811
+ "learning_rate": 1.8314039774413774e-07,
3812
+ "logits/chosen": 0.640625,
3813
+ "logits/rejected": 1.1171875,
3814
+ "logps/chosen": -195.0,
3815
+ "logps/rejected": -332.0,
3816
+ "loss": 0.0,
3817
+ "rewards/accuracies": 1.0,
3818
+ "rewards/chosen": 3.28125,
3819
+ "rewards/margins": 23.125,
3820
+ "rewards/rejected": -19.875,
3821
+ "step": 2510
3822
+ },
3823
+ {
3824
+ "epoch": 2.019230769230769,
3825
+ "grad_norm": 6.205803823998353e-07,
3826
+ "learning_rate": 1.8165627782724842e-07,
3827
+ "logits/chosen": 0.796875,
3828
+ "logits/rejected": 1.1328125,
3829
+ "logps/chosen": -141.0,
3830
+ "logps/rejected": -340.0,
3831
+ "loss": 0.0,
3832
+ "rewards/accuracies": 1.0,
3833
+ "rewards/chosen": 3.3125,
3834
+ "rewards/margins": 23.875,
3835
+ "rewards/rejected": -20.5,
3836
+ "step": 2520
3837
+ },
3838
+ {
3839
+ "epoch": 2.02724358974359,
3840
+ "grad_norm": 1.5474883916264918e-05,
3841
+ "learning_rate": 1.8017215791035914e-07,
3842
+ "logits/chosen": 0.87109375,
3843
+ "logits/rejected": 1.375,
3844
+ "logps/chosen": -172.0,
3845
+ "logps/rejected": -338.0,
3846
+ "loss": 0.0,
3847
+ "rewards/accuracies": 1.0,
3848
+ "rewards/chosen": 3.21875,
3849
+ "rewards/margins": 23.25,
3850
+ "rewards/rejected": -20.125,
3851
+ "step": 2530
3852
+ },
3853
+ {
3854
+ "epoch": 2.03525641025641,
3855
+ "grad_norm": 3.136476138411595e-06,
3856
+ "learning_rate": 1.7868803799346988e-07,
3857
+ "logits/chosen": 0.9140625,
3858
+ "logits/rejected": 1.2578125,
3859
+ "logps/chosen": -159.0,
3860
+ "logps/rejected": -326.0,
3861
+ "loss": 0.0,
3862
+ "rewards/accuracies": 1.0,
3863
+ "rewards/chosen": 3.890625,
3864
+ "rewards/margins": 23.0,
3865
+ "rewards/rejected": -19.125,
3866
+ "step": 2540
3867
+ },
3868
+ {
3869
+ "epoch": 2.043269230769231,
3870
+ "grad_norm": 2.0185773664202382e-07,
3871
+ "learning_rate": 1.772039180765806e-07,
3872
+ "logits/chosen": 0.8203125,
3873
+ "logits/rejected": 1.46875,
3874
+ "logps/chosen": -157.0,
3875
+ "logps/rejected": -316.0,
3876
+ "loss": 0.0,
3877
+ "rewards/accuracies": 1.0,
3878
+ "rewards/chosen": 3.96875,
3879
+ "rewards/margins": 22.0,
3880
+ "rewards/rejected": -18.0,
3881
+ "step": 2550
3882
+ },
3883
+ {
3884
+ "epoch": 2.051282051282051,
3885
+ "grad_norm": 6.762579805129387e-06,
3886
+ "learning_rate": 1.757197981596913e-07,
3887
+ "logits/chosen": 0.6953125,
3888
+ "logits/rejected": 1.2578125,
3889
+ "logps/chosen": -175.0,
3890
+ "logps/rejected": -320.0,
3891
+ "loss": 0.0,
3892
+ "rewards/accuracies": 1.0,
3893
+ "rewards/chosen": 3.828125,
3894
+ "rewards/margins": 22.375,
3895
+ "rewards/rejected": -18.5,
3896
+ "step": 2560
3897
+ },
3898
+ {
3899
+ "epoch": 2.059294871794872,
3900
+ "grad_norm": 1.272394096308946e-06,
3901
+ "learning_rate": 1.74235678242802e-07,
3902
+ "logits/chosen": 0.9921875,
3903
+ "logits/rejected": 1.2734375,
3904
+ "logps/chosen": -168.0,
3905
+ "logps/rejected": -338.0,
3906
+ "loss": 0.0,
3907
+ "rewards/accuracies": 1.0,
3908
+ "rewards/chosen": 3.6875,
3909
+ "rewards/margins": 23.125,
3910
+ "rewards/rejected": -19.5,
3911
+ "step": 2570
3912
+ },
3913
+ {
3914
+ "epoch": 2.0673076923076925,
3915
+ "grad_norm": 0.00022224801984980412,
3916
+ "learning_rate": 1.7275155832591273e-07,
3917
+ "logits/chosen": 0.86328125,
3918
+ "logits/rejected": 1.2734375,
3919
+ "logps/chosen": -124.0,
3920
+ "logps/rejected": -326.0,
3921
+ "loss": 0.0,
3922
+ "rewards/accuracies": 1.0,
3923
+ "rewards/chosen": 3.484375,
3924
+ "rewards/margins": 23.5,
3925
+ "rewards/rejected": -20.0,
3926
+ "step": 2580
3927
+ },
3928
+ {
3929
+ "epoch": 2.0753205128205128,
3930
+ "grad_norm": 5.872515304133134e-06,
3931
+ "learning_rate": 1.7126743840902344e-07,
3932
+ "logits/chosen": 0.9140625,
3933
+ "logits/rejected": 1.15625,
3934
+ "logps/chosen": -160.0,
3935
+ "logps/rejected": -318.0,
3936
+ "loss": 0.0001,
3937
+ "rewards/accuracies": 1.0,
3938
+ "rewards/chosen": 3.734375,
3939
+ "rewards/margins": 22.5,
3940
+ "rewards/rejected": -18.875,
3941
+ "step": 2590
3942
+ },
3943
+ {
3944
+ "epoch": 2.0833333333333335,
3945
+ "grad_norm": 0.0015141429378734963,
3946
+ "learning_rate": 1.6978331849213416e-07,
3947
+ "logits/chosen": 0.7578125,
3948
+ "logits/rejected": 1.1484375,
3949
+ "logps/chosen": -183.0,
3950
+ "logps/rejected": -336.0,
3951
+ "loss": 0.0,
3952
+ "rewards/accuracies": 1.0,
3953
+ "rewards/chosen": 3.328125,
3954
+ "rewards/margins": 23.125,
3955
+ "rewards/rejected": -19.875,
3956
+ "step": 2600
3957
+ },
3958
+ {
3959
+ "epoch": 2.0913461538461537,
3960
+ "grad_norm": 2.991210518482803e-06,
3961
+ "learning_rate": 1.6829919857524487e-07,
3962
+ "logits/chosen": 0.81640625,
3963
+ "logits/rejected": 0.8671875,
3964
+ "logps/chosen": -140.0,
3965
+ "logps/rejected": -342.0,
3966
+ "loss": 0.0,
3967
+ "rewards/accuracies": 1.0,
3968
+ "rewards/chosen": 2.96875,
3969
+ "rewards/margins": 23.5,
3970
+ "rewards/rejected": -20.5,
3971
+ "step": 2610
3972
+ },
3973
+ {
3974
+ "epoch": 2.0993589743589745,
3975
+ "grad_norm": 5.742496354978091e-06,
3976
+ "learning_rate": 1.6681507865835558e-07,
3977
+ "logits/chosen": 0.6953125,
3978
+ "logits/rejected": 1.3203125,
3979
+ "logps/chosen": -186.0,
3980
+ "logps/rejected": -338.0,
3981
+ "loss": 0.0,
3982
+ "rewards/accuracies": 1.0,
3983
+ "rewards/chosen": 3.265625,
3984
+ "rewards/margins": 23.125,
3985
+ "rewards/rejected": -19.875,
3986
+ "step": 2620
3987
+ },
3988
+ {
3989
+ "epoch": 2.1073717948717947,
3990
+ "grad_norm": 3.757183821571958e-05,
3991
+ "learning_rate": 1.653309587414663e-07,
3992
+ "logits/chosen": 0.98046875,
3993
+ "logits/rejected": 1.3046875,
3994
+ "logps/chosen": -150.0,
3995
+ "logps/rejected": -316.0,
3996
+ "loss": 0.0,
3997
+ "rewards/accuracies": 1.0,
3998
+ "rewards/chosen": 3.46875,
3999
+ "rewards/margins": 21.875,
4000
+ "rewards/rejected": -18.375,
4001
+ "step": 2630
4002
+ },
4003
+ {
4004
+ "epoch": 2.1153846153846154,
4005
+ "grad_norm": 1.1309302661788921e-05,
4006
+ "learning_rate": 1.63846838824577e-07,
4007
+ "logits/chosen": 0.796875,
4008
+ "logits/rejected": 1.21875,
4009
+ "logps/chosen": -195.0,
4010
+ "logps/rejected": -330.0,
4011
+ "loss": 0.0,
4012
+ "rewards/accuracies": 1.0,
4013
+ "rewards/chosen": 3.015625,
4014
+ "rewards/margins": 22.125,
4015
+ "rewards/rejected": -19.125,
4016
+ "step": 2640
4017
+ },
4018
+ {
4019
+ "epoch": 2.123397435897436,
4020
+ "grad_norm": 0.00011609396836253147,
4021
+ "learning_rate": 1.6236271890768775e-07,
4022
+ "logits/chosen": 0.8828125,
4023
+ "logits/rejected": 1.578125,
4024
+ "logps/chosen": -143.0,
4025
+ "logps/rejected": -334.0,
4026
+ "loss": 0.0,
4027
+ "rewards/accuracies": 1.0,
4028
+ "rewards/chosen": 3.53125,
4029
+ "rewards/margins": 23.125,
4030
+ "rewards/rejected": -19.625,
4031
+ "step": 2650
4032
+ },
4033
+ {
4034
+ "epoch": 2.1314102564102564,
4035
+ "grad_norm": 2.37606345324713e-07,
4036
+ "learning_rate": 1.6087859899079844e-07,
4037
+ "logits/chosen": 0.703125,
4038
+ "logits/rejected": 1.40625,
4039
+ "logps/chosen": -175.0,
4040
+ "logps/rejected": -334.0,
4041
+ "loss": 0.0,
4042
+ "rewards/accuracies": 1.0,
4043
+ "rewards/chosen": 3.71875,
4044
+ "rewards/margins": 24.0,
4045
+ "rewards/rejected": -20.25,
4046
+ "step": 2660
4047
+ },
4048
+ {
4049
+ "epoch": 2.139423076923077,
4050
+ "grad_norm": 1.7790769529114564e-05,
4051
+ "learning_rate": 1.5939447907390915e-07,
4052
+ "logits/chosen": 0.59765625,
4053
+ "logits/rejected": 1.21875,
4054
+ "logps/chosen": -207.0,
4055
+ "logps/rejected": -352.0,
4056
+ "loss": 0.0,
4057
+ "rewards/accuracies": 1.0,
4058
+ "rewards/chosen": 3.453125,
4059
+ "rewards/margins": 23.625,
4060
+ "rewards/rejected": -20.25,
4061
+ "step": 2670
4062
+ },
4063
+ {
4064
+ "epoch": 2.1474358974358974,
4065
+ "grad_norm": 1.2324246884581975e-07,
4066
+ "learning_rate": 1.579103591570199e-07,
4067
+ "logits/chosen": 0.9296875,
4068
+ "logits/rejected": 1.3828125,
4069
+ "logps/chosen": -185.0,
4070
+ "logps/rejected": -334.0,
4071
+ "loss": 0.0,
4072
+ "rewards/accuracies": 1.0,
4073
+ "rewards/chosen": 3.34375,
4074
+ "rewards/margins": 23.125,
4075
+ "rewards/rejected": -19.75,
4076
+ "step": 2680
4077
+ },
4078
+ {
4079
+ "epoch": 2.155448717948718,
4080
+ "grad_norm": 8.415390201667563e-05,
4081
+ "learning_rate": 1.564262392401306e-07,
4082
+ "logits/chosen": 0.828125,
4083
+ "logits/rejected": 1.421875,
4084
+ "logps/chosen": -173.0,
4085
+ "logps/rejected": -338.0,
4086
+ "loss": 0.0,
4087
+ "rewards/accuracies": 1.0,
4088
+ "rewards/chosen": 3.5625,
4089
+ "rewards/margins": 24.125,
4090
+ "rewards/rejected": -20.5,
4091
+ "step": 2690
4092
+ },
4093
+ {
4094
+ "epoch": 2.1634615384615383,
4095
+ "grad_norm": 0.0011339204404605804,
4096
+ "learning_rate": 1.549421193232413e-07,
4097
+ "logits/chosen": 0.82421875,
4098
+ "logits/rejected": 1.15625,
4099
+ "logps/chosen": -168.0,
4100
+ "logps/rejected": -342.0,
4101
+ "loss": 0.0,
4102
+ "rewards/accuracies": 1.0,
4103
+ "rewards/chosen": 3.578125,
4104
+ "rewards/margins": 23.875,
4105
+ "rewards/rejected": -20.25,
4106
+ "step": 2700
4107
+ },
4108
+ {
4109
+ "epoch": 2.171474358974359,
4110
+ "grad_norm": 6.851096339481569e-06,
4111
+ "learning_rate": 1.5345799940635203e-07,
4112
+ "logits/chosen": 0.53125,
4113
+ "logits/rejected": 1.2734375,
4114
+ "logps/chosen": -197.0,
4115
+ "logps/rejected": -348.0,
4116
+ "loss": 0.0,
4117
+ "rewards/accuracies": 1.0,
4118
+ "rewards/chosen": 3.328125,
4119
+ "rewards/margins": 24.25,
4120
+ "rewards/rejected": -20.875,
4121
+ "step": 2710
4122
+ },
4123
+ {
4124
+ "epoch": 2.1794871794871793,
4125
+ "grad_norm": 0.0003726374461810356,
4126
+ "learning_rate": 1.5197387948946275e-07,
4127
+ "logits/chosen": 1.03125,
4128
+ "logits/rejected": 1.453125,
4129
+ "logps/chosen": -169.0,
4130
+ "logps/rejected": -320.0,
4131
+ "loss": 0.0,
4132
+ "rewards/accuracies": 1.0,
4133
+ "rewards/chosen": 3.8125,
4134
+ "rewards/margins": 22.5,
4135
+ "rewards/rejected": -18.75,
4136
+ "step": 2720
4137
+ },
4138
+ {
4139
+ "epoch": 2.1875,
4140
+ "grad_norm": 2.8940339945841385e-05,
4141
+ "learning_rate": 1.5048975957257346e-07,
4142
+ "logits/chosen": 0.78125,
4143
+ "logits/rejected": 1.1171875,
4144
+ "logps/chosen": -168.0,
4145
+ "logps/rejected": -340.0,
4146
+ "loss": 0.0,
4147
+ "rewards/accuracies": 1.0,
4148
+ "rewards/chosen": 2.96875,
4149
+ "rewards/margins": 23.125,
4150
+ "rewards/rejected": -20.125,
4151
+ "step": 2730
4152
+ },
4153
+ {
4154
+ "epoch": 2.1955128205128207,
4155
+ "grad_norm": 0.002859780208342844,
4156
+ "learning_rate": 1.4900563965568417e-07,
4157
+ "logits/chosen": 0.8671875,
4158
+ "logits/rejected": 1.4140625,
4159
+ "logps/chosen": -171.0,
4160
+ "logps/rejected": -312.0,
4161
+ "loss": 0.0,
4162
+ "rewards/accuracies": 1.0,
4163
+ "rewards/chosen": 3.453125,
4164
+ "rewards/margins": 21.0,
4165
+ "rewards/rejected": -17.625,
4166
+ "step": 2740
4167
+ },
4168
+ {
4169
+ "epoch": 2.203525641025641,
4170
+ "grad_norm": 8.642451938502949e-05,
4171
+ "learning_rate": 1.4752151973879489e-07,
4172
+ "logits/chosen": 0.92578125,
4173
+ "logits/rejected": 1.0703125,
4174
+ "logps/chosen": -152.0,
4175
+ "logps/rejected": -322.0,
4176
+ "loss": 0.0,
4177
+ "rewards/accuracies": 1.0,
4178
+ "rewards/chosen": 3.625,
4179
+ "rewards/margins": 23.125,
4180
+ "rewards/rejected": -19.5,
4181
+ "step": 2750
4182
+ },
4183
+ {
4184
+ "epoch": 2.2115384615384617,
4185
+ "grad_norm": 0.004973797903281045,
4186
+ "learning_rate": 1.460373998219056e-07,
4187
+ "logits/chosen": 1.171875,
4188
+ "logits/rejected": 1.5859375,
4189
+ "logps/chosen": -154.0,
4190
+ "logps/rejected": -312.0,
4191
+ "loss": 0.0,
4192
+ "rewards/accuracies": 1.0,
4193
+ "rewards/chosen": 3.296875,
4194
+ "rewards/margins": 22.625,
4195
+ "rewards/rejected": -19.25,
4196
+ "step": 2760
4197
+ },
4198
+ {
4199
+ "epoch": 2.219551282051282,
4200
+ "grad_norm": 0.0007041585520470051,
4201
+ "learning_rate": 1.4455327990501631e-07,
4202
+ "logits/chosen": 0.796875,
4203
+ "logits/rejected": 1.015625,
4204
+ "logps/chosen": -140.0,
4205
+ "logps/rejected": -338.0,
4206
+ "loss": 0.0,
4207
+ "rewards/accuracies": 1.0,
4208
+ "rewards/chosen": 3.421875,
4209
+ "rewards/margins": 24.75,
4210
+ "rewards/rejected": -21.25,
4211
+ "step": 2770
4212
+ },
4213
+ {
4214
+ "epoch": 2.2275641025641026,
4215
+ "grad_norm": 0.00045000223905260884,
4216
+ "learning_rate": 1.4306915998812705e-07,
4217
+ "logits/chosen": 1.0625,
4218
+ "logits/rejected": 1.4375,
4219
+ "logps/chosen": -171.0,
4220
+ "logps/rejected": -336.0,
4221
+ "loss": 0.0,
4222
+ "rewards/accuracies": 1.0,
4223
+ "rewards/chosen": 3.328125,
4224
+ "rewards/margins": 23.375,
4225
+ "rewards/rejected": -20.0,
4226
+ "step": 2780
4227
+ },
4228
+ {
4229
+ "epoch": 2.235576923076923,
4230
+ "grad_norm": 3.7708664164834145e-07,
4231
+ "learning_rate": 1.4158504007123777e-07,
4232
+ "logits/chosen": 0.921875,
4233
+ "logits/rejected": 1.34375,
4234
+ "logps/chosen": -188.0,
4235
+ "logps/rejected": -308.0,
4236
+ "loss": 0.0,
4237
+ "rewards/accuracies": 1.0,
4238
+ "rewards/chosen": 3.359375,
4239
+ "rewards/margins": 22.0,
4240
+ "rewards/rejected": -18.75,
4241
+ "step": 2790
4242
+ },
4243
+ {
4244
+ "epoch": 2.2435897435897436,
4245
+ "grad_norm": 2.820076649705414e-06,
4246
+ "learning_rate": 1.4010092015434845e-07,
4247
+ "logits/chosen": 0.765625,
4248
+ "logits/rejected": 1.375,
4249
+ "logps/chosen": -211.0,
4250
+ "logps/rejected": -330.0,
4251
+ "loss": 0.0,
4252
+ "rewards/accuracies": 1.0,
4253
+ "rewards/chosen": 3.703125,
4254
+ "rewards/margins": 22.625,
4255
+ "rewards/rejected": -19.0,
4256
+ "step": 2800
4257
+ },
4258
+ {
4259
+ "epoch": 2.251602564102564,
4260
+ "grad_norm": 7.476346576947421e-06,
4261
+ "learning_rate": 1.3861680023745917e-07,
4262
+ "logits/chosen": 0.96875,
4263
+ "logits/rejected": 1.171875,
4264
+ "logps/chosen": -150.0,
4265
+ "logps/rejected": -342.0,
4266
+ "loss": 0.0,
4267
+ "rewards/accuracies": 1.0,
4268
+ "rewards/chosen": 2.984375,
4269
+ "rewards/margins": 23.5,
4270
+ "rewards/rejected": -20.5,
4271
+ "step": 2810
4272
+ },
4273
+ {
4274
+ "epoch": 2.2596153846153846,
4275
+ "grad_norm": 7.318508636096753e-07,
4276
+ "learning_rate": 1.371326803205699e-07,
4277
+ "logits/chosen": 0.94140625,
4278
+ "logits/rejected": 1.4296875,
4279
+ "logps/chosen": -164.0,
4280
+ "logps/rejected": -322.0,
4281
+ "loss": 0.0,
4282
+ "rewards/accuracies": 1.0,
4283
+ "rewards/chosen": 3.46875,
4284
+ "rewards/margins": 22.625,
4285
+ "rewards/rejected": -19.125,
4286
+ "step": 2820
4287
+ },
4288
+ {
4289
+ "epoch": 2.2676282051282053,
4290
+ "grad_norm": 4.8320214702453866e-05,
4291
+ "learning_rate": 1.3564856040368062e-07,
4292
+ "logits/chosen": 0.94140625,
4293
+ "logits/rejected": 1.21875,
4294
+ "logps/chosen": -160.0,
4295
+ "logps/rejected": -332.0,
4296
+ "loss": 0.0,
4297
+ "rewards/accuracies": 1.0,
4298
+ "rewards/chosen": 3.5,
4299
+ "rewards/margins": 22.75,
4300
+ "rewards/rejected": -19.25,
4301
+ "step": 2830
4302
+ },
4303
+ {
4304
+ "epoch": 2.2756410256410255,
4305
+ "grad_norm": 5.030183317968072e-06,
4306
+ "learning_rate": 1.341644404867913e-07,
4307
+ "logits/chosen": 0.76953125,
4308
+ "logits/rejected": 1.0703125,
4309
+ "logps/chosen": -185.0,
4310
+ "logps/rejected": -328.0,
4311
+ "loss": 0.0005,
4312
+ "rewards/accuracies": 1.0,
4313
+ "rewards/chosen": 3.671875,
4314
+ "rewards/margins": 23.0,
4315
+ "rewards/rejected": -19.25,
4316
+ "step": 2840
4317
+ },
4318
+ {
4319
+ "epoch": 2.2836538461538463,
4320
+ "grad_norm": 0.00014825087080426772,
4321
+ "learning_rate": 1.3268032056990205e-07,
4322
+ "logits/chosen": 0.78515625,
4323
+ "logits/rejected": 1.3203125,
4324
+ "logps/chosen": -181.0,
4325
+ "logps/rejected": -338.0,
4326
+ "loss": 0.0,
4327
+ "rewards/accuracies": 1.0,
4328
+ "rewards/chosen": 3.796875,
4329
+ "rewards/margins": 23.5,
4330
+ "rewards/rejected": -19.75,
4331
+ "step": 2850
4332
+ },
4333
+ {
4334
+ "epoch": 2.2916666666666665,
4335
+ "grad_norm": 1.3011868588303958e-06,
4336
+ "learning_rate": 1.3119620065301276e-07,
4337
+ "logits/chosen": 1.0625,
4338
+ "logits/rejected": 1.3359375,
4339
+ "logps/chosen": -146.0,
4340
+ "logps/rejected": -332.0,
4341
+ "loss": 0.0,
4342
+ "rewards/accuracies": 1.0,
4343
+ "rewards/chosen": 3.09375,
4344
+ "rewards/margins": 23.25,
4345
+ "rewards/rejected": -20.125,
4346
+ "step": 2860
4347
+ },
4348
+ {
4349
+ "epoch": 2.2996794871794872,
4350
+ "grad_norm": 4.246488981116065e-06,
4351
+ "learning_rate": 1.2971208073612347e-07,
4352
+ "logits/chosen": 0.86328125,
4353
+ "logits/rejected": 1.4921875,
4354
+ "logps/chosen": -184.0,
4355
+ "logps/rejected": -332.0,
4356
+ "loss": 0.0,
4357
+ "rewards/accuracies": 1.0,
4358
+ "rewards/chosen": 3.578125,
4359
+ "rewards/margins": 22.875,
4360
+ "rewards/rejected": -19.25,
4361
+ "step": 2870
4362
+ },
4363
+ {
4364
+ "epoch": 2.3076923076923075,
4365
+ "grad_norm": 7.270042186092499e-06,
4366
+ "learning_rate": 1.282279608192342e-07,
4367
+ "logits/chosen": 0.66015625,
4368
+ "logits/rejected": 1.390625,
4369
+ "logps/chosen": -144.0,
4370
+ "logps/rejected": -326.0,
4371
+ "loss": 0.0,
4372
+ "rewards/accuracies": 1.0,
4373
+ "rewards/chosen": 3.203125,
4374
+ "rewards/margins": 23.75,
4375
+ "rewards/rejected": -20.5,
4376
+ "step": 2880
4377
+ },
4378
+ {
4379
+ "epoch": 2.315705128205128,
4380
+ "grad_norm": 1.4457369601091267e-08,
4381
+ "learning_rate": 1.267438409023449e-07,
4382
+ "logits/chosen": 1.0703125,
4383
+ "logits/rejected": 1.296875,
4384
+ "logps/chosen": -122.5,
4385
+ "logps/rejected": -336.0,
4386
+ "loss": 0.0,
4387
+ "rewards/accuracies": 1.0,
4388
+ "rewards/chosen": 3.015625,
4389
+ "rewards/margins": 23.375,
4390
+ "rewards/rejected": -20.25,
4391
+ "step": 2890
4392
+ },
4393
+ {
4394
+ "epoch": 2.323717948717949,
4395
+ "grad_norm": 1.2540641729400349e-06,
4396
+ "learning_rate": 1.2525972098545562e-07,
4397
+ "logits/chosen": 0.98828125,
4398
+ "logits/rejected": 1.1484375,
4399
+ "logps/chosen": -187.0,
4400
+ "logps/rejected": -330.0,
4401
+ "loss": 0.0,
4402
+ "rewards/accuracies": 1.0,
4403
+ "rewards/chosen": 3.640625,
4404
+ "rewards/margins": 22.5,
4405
+ "rewards/rejected": -18.875,
4406
+ "step": 2900
4407
+ },
4408
+ {
4409
+ "epoch": 2.331730769230769,
4410
+ "grad_norm": 1.0058948099975264e-05,
4411
+ "learning_rate": 1.2377560106856633e-07,
4412
+ "logits/chosen": 0.9140625,
4413
+ "logits/rejected": 1.296875,
4414
+ "logps/chosen": -146.0,
4415
+ "logps/rejected": -342.0,
4416
+ "loss": 0.0,
4417
+ "rewards/accuracies": 1.0,
4418
+ "rewards/chosen": 3.234375,
4419
+ "rewards/margins": 23.75,
4420
+ "rewards/rejected": -20.625,
4421
+ "step": 2910
4422
+ },
4423
+ {
4424
+ "epoch": 2.33974358974359,
4425
+ "grad_norm": 3.2768050524307317e-06,
4426
+ "learning_rate": 1.2229148115167704e-07,
4427
+ "logits/chosen": 0.87109375,
4428
+ "logits/rejected": 1.0,
4429
+ "logps/chosen": -145.0,
4430
+ "logps/rejected": -336.0,
4431
+ "loss": 0.0,
4432
+ "rewards/accuracies": 1.0,
4433
+ "rewards/chosen": 3.3125,
4434
+ "rewards/margins": 23.75,
4435
+ "rewards/rejected": -20.5,
4436
+ "step": 2920
4437
+ },
4438
+ {
4439
+ "epoch": 2.34775641025641,
4440
+ "grad_norm": 8.279411059572999e-06,
4441
+ "learning_rate": 1.2080736123478776e-07,
4442
+ "logits/chosen": 0.71484375,
4443
+ "logits/rejected": 1.296875,
4444
+ "logps/chosen": -183.0,
4445
+ "logps/rejected": -338.0,
4446
+ "loss": 0.0,
4447
+ "rewards/accuracies": 1.0,
4448
+ "rewards/chosen": 2.734375,
4449
+ "rewards/margins": 22.625,
4450
+ "rewards/rejected": -19.875,
4451
+ "step": 2930
4452
+ },
4453
+ {
4454
+ "epoch": 2.355769230769231,
4455
+ "grad_norm": 5.097273243887312e-06,
4456
+ "learning_rate": 1.1932324131789847e-07,
4457
+ "logits/chosen": 0.9375,
4458
+ "logits/rejected": 1.4453125,
4459
+ "logps/chosen": -161.0,
4460
+ "logps/rejected": -360.0,
4461
+ "loss": 0.0,
4462
+ "rewards/accuracies": 1.0,
4463
+ "rewards/chosen": 3.15625,
4464
+ "rewards/margins": 24.75,
4465
+ "rewards/rejected": -21.625,
4466
+ "step": 2940
4467
+ },
4468
+ {
4469
+ "epoch": 2.363782051282051,
4470
+ "grad_norm": 0.001364815117768273,
4471
+ "learning_rate": 1.178391214010092e-07,
4472
+ "logits/chosen": 0.82421875,
4473
+ "logits/rejected": 1.0546875,
4474
+ "logps/chosen": -196.0,
4475
+ "logps/rejected": -340.0,
4476
+ "loss": 0.0,
4477
+ "rewards/accuracies": 1.0,
4478
+ "rewards/chosen": 3.5,
4479
+ "rewards/margins": 22.5,
4480
+ "rewards/rejected": -18.875,
4481
+ "step": 2950
4482
+ },
4483
+ {
4484
+ "epoch": 2.371794871794872,
4485
+ "grad_norm": 0.00013766237352927737,
4486
+ "learning_rate": 1.1635500148411991e-07,
4487
+ "logits/chosen": 0.828125,
4488
+ "logits/rejected": 1.53125,
4489
+ "logps/chosen": -160.0,
4490
+ "logps/rejected": -324.0,
4491
+ "loss": 0.0,
4492
+ "rewards/accuracies": 1.0,
4493
+ "rewards/chosen": 3.46875,
4494
+ "rewards/margins": 22.875,
4495
+ "rewards/rejected": -19.5,
4496
+ "step": 2960
4497
+ },
4498
+ {
4499
+ "epoch": 2.3798076923076925,
4500
+ "grad_norm": 7.072038044334629e-06,
4501
+ "learning_rate": 1.1487088156723062e-07,
4502
+ "logits/chosen": 0.97265625,
4503
+ "logits/rejected": 1.4609375,
4504
+ "logps/chosen": -148.0,
4505
+ "logps/rejected": -318.0,
4506
+ "loss": 0.0,
4507
+ "rewards/accuracies": 1.0,
4508
+ "rewards/chosen": 3.1875,
4509
+ "rewards/margins": 22.75,
4510
+ "rewards/rejected": -19.625,
4511
+ "step": 2970
4512
+ },
4513
+ {
4514
+ "epoch": 2.3878205128205128,
4515
+ "grad_norm": 1.434535069439472e-06,
4516
+ "learning_rate": 1.1338676165034135e-07,
4517
+ "logits/chosen": 0.87890625,
4518
+ "logits/rejected": 0.93359375,
4519
+ "logps/chosen": -147.0,
4520
+ "logps/rejected": -342.0,
4521
+ "loss": 0.0002,
4522
+ "rewards/accuracies": 1.0,
4523
+ "rewards/chosen": 3.203125,
4524
+ "rewards/margins": 22.75,
4525
+ "rewards/rejected": -19.5,
4526
+ "step": 2980
4527
+ },
4528
+ {
4529
+ "epoch": 2.3958333333333335,
4530
+ "grad_norm": 0.0003150427679969826,
4531
+ "learning_rate": 1.1190264173345205e-07,
4532
+ "logits/chosen": 0.78125,
4533
+ "logits/rejected": 1.0703125,
4534
+ "logps/chosen": -185.0,
4535
+ "logps/rejected": -330.0,
4536
+ "loss": 0.0,
4537
+ "rewards/accuracies": 1.0,
4538
+ "rewards/chosen": 3.375,
4539
+ "rewards/margins": 22.875,
4540
+ "rewards/rejected": -19.5,
4541
+ "step": 2990
4542
+ },
4543
+ {
4544
+ "epoch": 2.4038461538461537,
4545
+ "grad_norm": 7.914823589207877e-05,
4546
+ "learning_rate": 1.1041852181656278e-07,
4547
+ "logits/chosen": 0.9296875,
4548
+ "logits/rejected": 1.421875,
4549
+ "logps/chosen": -151.0,
4550
+ "logps/rejected": -342.0,
4551
+ "loss": 0.0,
4552
+ "rewards/accuracies": 1.0,
4553
+ "rewards/chosen": 3.703125,
4554
+ "rewards/margins": 23.375,
4555
+ "rewards/rejected": -19.75,
4556
+ "step": 3000
4557
+ },
4558
+ {
4559
+ "epoch": 2.4118589743589745,
4560
+ "grad_norm": 0.00026905024279143275,
4561
+ "learning_rate": 1.0893440189967348e-07,
4562
+ "logits/chosen": 0.76171875,
4563
+ "logits/rejected": 1.34375,
4564
+ "logps/chosen": -161.0,
4565
+ "logps/rejected": -354.0,
4566
+ "loss": 0.0,
4567
+ "rewards/accuracies": 1.0,
4568
+ "rewards/chosen": 3.40625,
4569
+ "rewards/margins": 24.5,
4570
+ "rewards/rejected": -21.125,
4571
+ "step": 3010
4572
+ },
4573
+ {
4574
+ "epoch": 2.4198717948717947,
4575
+ "grad_norm": 2.5621741265848455e-06,
4576
+ "learning_rate": 1.074502819827842e-07,
4577
+ "logits/chosen": 0.83203125,
4578
+ "logits/rejected": 1.3515625,
4579
+ "logps/chosen": -155.0,
4580
+ "logps/rejected": -352.0,
4581
+ "loss": 0.0,
4582
+ "rewards/accuracies": 1.0,
4583
+ "rewards/chosen": 3.46875,
4584
+ "rewards/margins": 23.25,
4585
+ "rewards/rejected": -19.875,
4586
+ "step": 3020
4587
+ },
4588
+ {
4589
+ "epoch": 2.4278846153846154,
4590
+ "grad_norm": 0.0006476017033990356,
4591
+ "learning_rate": 1.0596616206589493e-07,
4592
+ "logits/chosen": 0.83203125,
4593
+ "logits/rejected": 1.3046875,
4594
+ "logps/chosen": -176.0,
4595
+ "logps/rejected": -326.0,
4596
+ "loss": 0.0,
4597
+ "rewards/accuracies": 1.0,
4598
+ "rewards/chosen": 3.03125,
4599
+ "rewards/margins": 22.0,
4600
+ "rewards/rejected": -19.0,
4601
+ "step": 3030
4602
+ },
4603
+ {
4604
+ "epoch": 2.435897435897436,
4605
+ "grad_norm": 1.1941928602219868e-05,
4606
+ "learning_rate": 1.0448204214900563e-07,
4607
+ "logits/chosen": 1.1171875,
4608
+ "logits/rejected": 1.125,
4609
+ "logps/chosen": -167.0,
4610
+ "logps/rejected": -336.0,
4611
+ "loss": 0.0,
4612
+ "rewards/accuracies": 1.0,
4613
+ "rewards/chosen": 3.21875,
4614
+ "rewards/margins": 22.75,
4615
+ "rewards/rejected": -19.5,
4616
+ "step": 3040
4617
+ },
4618
+ {
4619
+ "epoch": 2.4439102564102564,
4620
+ "grad_norm": 4.511019063462089e-06,
4621
+ "learning_rate": 1.0299792223211636e-07,
4622
+ "logits/chosen": 0.96875,
4623
+ "logits/rejected": 1.2421875,
4624
+ "logps/chosen": -173.0,
4625
+ "logps/rejected": -338.0,
4626
+ "loss": 0.0,
4627
+ "rewards/accuracies": 1.0,
4628
+ "rewards/chosen": 3.421875,
4629
+ "rewards/margins": 22.75,
4630
+ "rewards/rejected": -19.375,
4631
+ "step": 3050
4632
+ },
4633
+ {
4634
+ "epoch": 2.451923076923077,
4635
+ "grad_norm": 1.1909188473574983e-06,
4636
+ "learning_rate": 1.0151380231522706e-07,
4637
+ "logits/chosen": 0.71875,
4638
+ "logits/rejected": 1.1953125,
4639
+ "logps/chosen": -189.0,
4640
+ "logps/rejected": -338.0,
4641
+ "loss": 0.0,
4642
+ "rewards/accuracies": 1.0,
4643
+ "rewards/chosen": 3.703125,
4644
+ "rewards/margins": 23.875,
4645
+ "rewards/rejected": -20.125,
4646
+ "step": 3060
4647
+ },
4648
+ {
4649
+ "epoch": 2.4599358974358974,
4650
+ "grad_norm": 0.00016413461567918458,
4651
+ "learning_rate": 1.0002968239833778e-07,
4652
+ "logits/chosen": 0.8515625,
4653
+ "logits/rejected": 1.140625,
4654
+ "logps/chosen": -176.0,
4655
+ "logps/rejected": -360.0,
4656
+ "loss": 0.0,
4657
+ "rewards/accuracies": 1.0,
4658
+ "rewards/chosen": 3.046875,
4659
+ "rewards/margins": 24.0,
4660
+ "rewards/rejected": -21.0,
4661
+ "step": 3070
4662
+ },
4663
+ {
4664
+ "epoch": 2.467948717948718,
4665
+ "grad_norm": 0.0004207874699143046,
4666
+ "learning_rate": 9.854556248144849e-08,
4667
+ "logits/chosen": 1.0625,
4668
+ "logits/rejected": 1.0703125,
4669
+ "logps/chosen": -171.0,
4670
+ "logps/rejected": -336.0,
4671
+ "loss": 0.0,
4672
+ "rewards/accuracies": 1.0,
4673
+ "rewards/chosen": 3.265625,
4674
+ "rewards/margins": 23.0,
4675
+ "rewards/rejected": -19.75,
4676
+ "step": 3080
4677
+ },
4678
+ {
4679
+ "epoch": 2.4759615384615383,
4680
+ "grad_norm": 0.0013603810220500937,
4681
+ "learning_rate": 9.706144256455921e-08,
4682
+ "logits/chosen": 0.859375,
4683
+ "logits/rejected": 1.4453125,
4684
+ "logps/chosen": -201.0,
4685
+ "logps/rejected": -330.0,
4686
+ "loss": 0.0,
4687
+ "rewards/accuracies": 1.0,
4688
+ "rewards/chosen": 3.75,
4689
+ "rewards/margins": 23.0,
4690
+ "rewards/rejected": -19.25,
4691
+ "step": 3090
4692
+ },
4693
+ {
4694
+ "epoch": 2.483974358974359,
4695
+ "grad_norm": 1.8119361476861766e-07,
4696
+ "learning_rate": 9.557732264766994e-08,
4697
+ "logits/chosen": 1.125,
4698
+ "logits/rejected": 1.65625,
4699
+ "logps/chosen": -181.0,
4700
+ "logps/rejected": -320.0,
4701
+ "loss": 0.0,
4702
+ "rewards/accuracies": 1.0,
4703
+ "rewards/chosen": 2.859375,
4704
+ "rewards/margins": 21.875,
4705
+ "rewards/rejected": -19.0,
4706
+ "step": 3100
4707
+ },
4708
+ {
4709
+ "epoch": 2.4919871794871793,
4710
+ "grad_norm": 1.0849727745118161e-05,
4711
+ "learning_rate": 9.409320273078064e-08,
4712
+ "logits/chosen": 0.75,
4713
+ "logits/rejected": 1.25,
4714
+ "logps/chosen": -194.0,
4715
+ "logps/rejected": -346.0,
4716
+ "loss": 0.0001,
4717
+ "rewards/accuracies": 1.0,
4718
+ "rewards/chosen": 3.65625,
4719
+ "rewards/margins": 25.125,
4720
+ "rewards/rejected": -21.375,
4721
+ "step": 3110
4722
+ },
4723
+ {
4724
+ "epoch": 2.5,
4725
+ "grad_norm": 0.00190454125856642,
4726
+ "learning_rate": 9.260908281389137e-08,
4727
+ "logits/chosen": 0.79296875,
4728
+ "logits/rejected": 1.1328125,
4729
+ "logps/chosen": -174.0,
4730
+ "logps/rejected": -318.0,
4731
+ "loss": 0.0,
4732
+ "rewards/accuracies": 1.0,
4733
+ "rewards/chosen": 3.671875,
4734
+ "rewards/margins": 23.0,
4735
+ "rewards/rejected": -19.25,
4736
+ "step": 3120
4737
+ },
4738
+ {
4739
+ "epoch": 2.5080128205128203,
4740
+ "grad_norm": 4.817138155607492e-05,
4741
+ "learning_rate": 9.112496289700207e-08,
4742
+ "logits/chosen": 1.078125,
4743
+ "logits/rejected": 1.0546875,
4744
+ "logps/chosen": -151.0,
4745
+ "logps/rejected": -336.0,
4746
+ "loss": 0.0,
4747
+ "rewards/accuracies": 1.0,
4748
+ "rewards/chosen": 3.6875,
4749
+ "rewards/margins": 23.5,
4750
+ "rewards/rejected": -19.875,
4751
+ "step": 3130
4752
+ },
4753
+ {
4754
+ "epoch": 2.516025641025641,
4755
+ "grad_norm": 4.1634510560200736e-07,
4756
+ "learning_rate": 8.964084298011279e-08,
4757
+ "logits/chosen": 0.89453125,
4758
+ "logits/rejected": 1.21875,
4759
+ "logps/chosen": -165.0,
4760
+ "logps/rejected": -350.0,
4761
+ "loss": 0.0,
4762
+ "rewards/accuracies": 1.0,
4763
+ "rewards/chosen": 3.03125,
4764
+ "rewards/margins": 23.625,
4765
+ "rewards/rejected": -20.625,
4766
+ "step": 3140
4767
+ },
4768
+ {
4769
+ "epoch": 2.5240384615384617,
4770
+ "grad_norm": 0.0030510730836868943,
4771
+ "learning_rate": 8.815672306322349e-08,
4772
+ "logits/chosen": 0.83984375,
4773
+ "logits/rejected": 1.3359375,
4774
+ "logps/chosen": -150.0,
4775
+ "logps/rejected": -338.0,
4776
+ "loss": 0.0,
4777
+ "rewards/accuracies": 1.0,
4778
+ "rewards/chosen": 3.09375,
4779
+ "rewards/margins": 23.5,
4780
+ "rewards/rejected": -20.375,
4781
+ "step": 3150
4782
+ },
4783
+ {
4784
+ "epoch": 2.532051282051282,
4785
+ "grad_norm": 4.333397926948427e-05,
4786
+ "learning_rate": 8.667260314633422e-08,
4787
+ "logits/chosen": 0.8203125,
4788
+ "logits/rejected": 1.328125,
4789
+ "logps/chosen": -144.0,
4790
+ "logps/rejected": -350.0,
4791
+ "loss": 0.0,
4792
+ "rewards/accuracies": 1.0,
4793
+ "rewards/chosen": 3.65625,
4794
+ "rewards/margins": 24.625,
4795
+ "rewards/rejected": -21.0,
4796
+ "step": 3160
4797
+ },
4798
+ {
4799
+ "epoch": 2.5400641025641026,
4800
+ "grad_norm": 3.816937452899327e-05,
4801
+ "learning_rate": 8.518848322944495e-08,
4802
+ "logits/chosen": 0.83203125,
4803
+ "logits/rejected": 1.1953125,
4804
+ "logps/chosen": -184.0,
4805
+ "logps/rejected": -330.0,
4806
+ "loss": 0.0,
4807
+ "rewards/accuracies": 1.0,
4808
+ "rewards/chosen": 3.515625,
4809
+ "rewards/margins": 23.25,
4810
+ "rewards/rejected": -19.75,
4811
+ "step": 3170
4812
+ },
4813
+ {
4814
+ "epoch": 2.5480769230769234,
4815
+ "grad_norm": 4.438174995412161e-06,
4816
+ "learning_rate": 8.370436331255565e-08,
4817
+ "logits/chosen": 0.9921875,
4818
+ "logits/rejected": 1.234375,
4819
+ "logps/chosen": -137.0,
4820
+ "logps/rejected": -334.0,
4821
+ "loss": 0.0,
4822
+ "rewards/accuracies": 1.0,
4823
+ "rewards/chosen": 3.546875,
4824
+ "rewards/margins": 23.75,
4825
+ "rewards/rejected": -20.25,
4826
+ "step": 3180
4827
+ },
4828
+ {
4829
+ "epoch": 2.5560897435897436,
4830
+ "grad_norm": 1.2674210936494033e-05,
4831
+ "learning_rate": 8.222024339566637e-08,
4832
+ "logits/chosen": 0.9765625,
4833
+ "logits/rejected": 1.1953125,
4834
+ "logps/chosen": -189.0,
4835
+ "logps/rejected": -338.0,
4836
+ "loss": 0.0,
4837
+ "rewards/accuracies": 1.0,
4838
+ "rewards/chosen": 3.828125,
4839
+ "rewards/margins": 22.625,
4840
+ "rewards/rejected": -18.75,
4841
+ "step": 3190
4842
+ },
4843
+ {
4844
+ "epoch": 2.564102564102564,
4845
+ "grad_norm": 2.2470915894489486e-06,
4846
+ "learning_rate": 8.073612347877707e-08,
4847
+ "logits/chosen": 0.7578125,
4848
+ "logits/rejected": 0.94140625,
4849
+ "logps/chosen": -210.0,
4850
+ "logps/rejected": -336.0,
4851
+ "loss": 0.0,
4852
+ "rewards/accuracies": 1.0,
4853
+ "rewards/chosen": 2.8125,
4854
+ "rewards/margins": 22.875,
4855
+ "rewards/rejected": -20.0,
4856
+ "step": 3200
4857
+ },
4858
+ {
4859
+ "epoch": 2.5721153846153846,
4860
+ "grad_norm": 0.0013392786326408394,
4861
+ "learning_rate": 7.92520035618878e-08,
4862
+ "logits/chosen": 0.70703125,
4863
+ "logits/rejected": 1.0625,
4864
+ "logps/chosen": -166.0,
4865
+ "logps/rejected": -330.0,
4866
+ "loss": 0.0,
4867
+ "rewards/accuracies": 1.0,
4868
+ "rewards/chosen": 3.28125,
4869
+ "rewards/margins": 23.125,
4870
+ "rewards/rejected": -19.875,
4871
+ "step": 3210
4872
+ },
4873
+ {
4874
+ "epoch": 2.5801282051282053,
4875
+ "grad_norm": 1.1479055755141908e-06,
4876
+ "learning_rate": 7.776788364499851e-08,
4877
+ "logits/chosen": 0.392578125,
4878
+ "logits/rejected": 1.03125,
4879
+ "logps/chosen": -176.0,
4880
+ "logps/rejected": -342.0,
4881
+ "loss": 0.0,
4882
+ "rewards/accuracies": 1.0,
4883
+ "rewards/chosen": 3.3125,
4884
+ "rewards/margins": 23.75,
4885
+ "rewards/rejected": -20.5,
4886
+ "step": 3220
4887
+ },
4888
+ {
4889
+ "epoch": 2.5881410256410255,
4890
+ "grad_norm": 6.57721816711125e-08,
4891
+ "learning_rate": 7.628376372810923e-08,
4892
+ "logits/chosen": 0.859375,
4893
+ "logits/rejected": 0.92578125,
4894
+ "logps/chosen": -161.0,
4895
+ "logps/rejected": -348.0,
4896
+ "loss": 0.0,
4897
+ "rewards/accuracies": 1.0,
4898
+ "rewards/chosen": 3.3125,
4899
+ "rewards/margins": 25.5,
4900
+ "rewards/rejected": -22.25,
4901
+ "step": 3230
4902
+ },
4903
+ {
4904
+ "epoch": 2.5961538461538463,
4905
+ "grad_norm": 8.654157902362506e-07,
4906
+ "learning_rate": 7.479964381121995e-08,
4907
+ "logits/chosen": 0.84375,
4908
+ "logits/rejected": 1.34375,
4909
+ "logps/chosen": -131.0,
4910
+ "logps/rejected": -296.0,
4911
+ "loss": 0.0,
4912
+ "rewards/accuracies": 1.0,
4913
+ "rewards/chosen": 3.34375,
4914
+ "rewards/margins": 21.125,
4915
+ "rewards/rejected": -17.75,
4916
+ "step": 3240
4917
+ },
4918
+ {
4919
+ "epoch": 2.6041666666666665,
4920
+ "grad_norm": 2.107173176911769e-05,
4921
+ "learning_rate": 7.331552389433065e-08,
4922
+ "logits/chosen": 0.92578125,
4923
+ "logits/rejected": 1.2421875,
4924
+ "logps/chosen": -182.0,
4925
+ "logps/rejected": -332.0,
4926
+ "loss": 0.0,
4927
+ "rewards/accuracies": 1.0,
4928
+ "rewards/chosen": 3.75,
4929
+ "rewards/margins": 22.625,
4930
+ "rewards/rejected": -18.875,
4931
+ "step": 3250
4932
+ },
4933
+ {
4934
+ "epoch": 2.6121794871794872,
4935
+ "grad_norm": 3.0995378385019935e-05,
4936
+ "learning_rate": 7.183140397744138e-08,
4937
+ "logits/chosen": 0.78515625,
4938
+ "logits/rejected": 1.1484375,
4939
+ "logps/chosen": -185.0,
4940
+ "logps/rejected": -330.0,
4941
+ "loss": 0.0,
4942
+ "rewards/accuracies": 1.0,
4943
+ "rewards/chosen": 3.53125,
4944
+ "rewards/margins": 23.375,
4945
+ "rewards/rejected": -19.75,
4946
+ "step": 3260
4947
+ },
4948
+ {
4949
+ "epoch": 2.6201923076923075,
4950
+ "grad_norm": 7.163491430995317e-06,
4951
+ "learning_rate": 7.034728406055208e-08,
4952
+ "logits/chosen": 1.0390625,
4953
+ "logits/rejected": 1.03125,
4954
+ "logps/chosen": -163.0,
4955
+ "logps/rejected": -336.0,
4956
+ "loss": 0.0,
4957
+ "rewards/accuracies": 1.0,
4958
+ "rewards/chosen": 3.5,
4959
+ "rewards/margins": 23.625,
4960
+ "rewards/rejected": -20.125,
4961
+ "step": 3270
4962
+ },
4963
+ {
4964
+ "epoch": 2.628205128205128,
4965
+ "grad_norm": 3.2825173935148675e-06,
4966
+ "learning_rate": 6.886316414366281e-08,
4967
+ "logits/chosen": 0.9921875,
4968
+ "logits/rejected": 1.1875,
4969
+ "logps/chosen": -167.0,
4970
+ "logps/rejected": -346.0,
4971
+ "loss": 0.0,
4972
+ "rewards/accuracies": 1.0,
4973
+ "rewards/chosen": 3.578125,
4974
+ "rewards/margins": 23.875,
4975
+ "rewards/rejected": -20.375,
4976
+ "step": 3280
4977
+ },
4978
+ {
4979
+ "epoch": 2.636217948717949,
4980
+ "grad_norm": 3.7018247685930666e-06,
4981
+ "learning_rate": 6.737904422677352e-08,
4982
+ "logits/chosen": 0.64453125,
4983
+ "logits/rejected": 1.125,
4984
+ "logps/chosen": -154.0,
4985
+ "logps/rejected": -340.0,
4986
+ "loss": 0.0,
4987
+ "rewards/accuracies": 1.0,
4988
+ "rewards/chosen": 3.28125,
4989
+ "rewards/margins": 25.0,
4990
+ "rewards/rejected": -21.625,
4991
+ "step": 3290
4992
+ },
4993
+ {
4994
+ "epoch": 2.644230769230769,
4995
+ "grad_norm": 1.5235500032868652e-06,
4996
+ "learning_rate": 6.589492430988424e-08,
4997
+ "logits/chosen": 0.86328125,
4998
+ "logits/rejected": 1.171875,
4999
+ "logps/chosen": -158.0,
5000
+ "logps/rejected": -326.0,
5001
+ "loss": 0.0,
5002
+ "rewards/accuracies": 1.0,
5003
+ "rewards/chosen": 4.0625,
5004
+ "rewards/margins": 23.0,
5005
+ "rewards/rejected": -19.0,
5006
+ "step": 3300
5007
+ },
5008
+ {
5009
+ "epoch": 2.65224358974359,
5010
+ "grad_norm": 2.4028100208495416e-05,
5011
+ "learning_rate": 6.441080439299495e-08,
5012
+ "logits/chosen": 0.9453125,
5013
+ "logits/rejected": 1.203125,
5014
+ "logps/chosen": -174.0,
5015
+ "logps/rejected": -330.0,
5016
+ "loss": 0.0,
5017
+ "rewards/accuracies": 1.0,
5018
+ "rewards/chosen": 3.484375,
5019
+ "rewards/margins": 23.375,
5020
+ "rewards/rejected": -19.875,
5021
+ "step": 3310
5022
+ },
5023
+ {
5024
+ "epoch": 2.66025641025641,
5025
+ "grad_norm": 0.0006641311306163846,
5026
+ "learning_rate": 6.292668447610566e-08,
5027
+ "logits/chosen": 1.03125,
5028
+ "logits/rejected": 1.375,
5029
+ "logps/chosen": -154.0,
5030
+ "logps/rejected": -348.0,
5031
+ "loss": 0.0002,
5032
+ "rewards/accuracies": 1.0,
5033
+ "rewards/chosen": 3.671875,
5034
+ "rewards/margins": 24.25,
5035
+ "rewards/rejected": -20.5,
5036
+ "step": 3320
5037
+ },
5038
+ {
5039
+ "epoch": 2.668269230769231,
5040
+ "grad_norm": 1.3261677467269389e-05,
5041
+ "learning_rate": 6.144256455921639e-08,
5042
+ "logits/chosen": 1.171875,
5043
+ "logits/rejected": 1.671875,
5044
+ "logps/chosen": -136.0,
5045
+ "logps/rejected": -342.0,
5046
+ "loss": 0.0,
5047
+ "rewards/accuracies": 1.0,
5048
+ "rewards/chosen": 3.6875,
5049
+ "rewards/margins": 24.0,
5050
+ "rewards/rejected": -20.375,
5051
+ "step": 3330
5052
+ },
5053
+ {
5054
+ "epoch": 2.676282051282051,
5055
+ "grad_norm": 2.3998995719683365e-07,
5056
+ "learning_rate": 5.99584446423271e-08,
5057
+ "logits/chosen": 0.890625,
5058
+ "logits/rejected": 1.2890625,
5059
+ "logps/chosen": -212.0,
5060
+ "logps/rejected": -320.0,
5061
+ "loss": 0.0,
5062
+ "rewards/accuracies": 1.0,
5063
+ "rewards/chosen": 3.40625,
5064
+ "rewards/margins": 22.875,
5065
+ "rewards/rejected": -19.5,
5066
+ "step": 3340
5067
+ },
5068
+ {
5069
+ "epoch": 2.684294871794872,
5070
+ "grad_norm": 1.1770292696948012e-07,
5071
+ "learning_rate": 5.847432472543781e-08,
5072
+ "logits/chosen": 0.83984375,
5073
+ "logits/rejected": 1.6484375,
5074
+ "logps/chosen": -168.0,
5075
+ "logps/rejected": -342.0,
5076
+ "loss": 0.0,
5077
+ "rewards/accuracies": 1.0,
5078
+ "rewards/chosen": 3.484375,
5079
+ "rewards/margins": 24.5,
5080
+ "rewards/rejected": -21.0,
5081
+ "step": 3350
5082
+ },
5083
+ {
5084
+ "epoch": 2.6923076923076925,
5085
+ "grad_norm": 2.900580842341117e-07,
5086
+ "learning_rate": 5.699020480854853e-08,
5087
+ "logits/chosen": 0.91796875,
5088
+ "logits/rejected": 1.09375,
5089
+ "logps/chosen": -142.0,
5090
+ "logps/rejected": -352.0,
5091
+ "loss": 0.0,
5092
+ "rewards/accuracies": 1.0,
5093
+ "rewards/chosen": 3.53125,
5094
+ "rewards/margins": 25.625,
5095
+ "rewards/rejected": -22.125,
5096
+ "step": 3360
5097
+ },
5098
+ {
5099
+ "epoch": 2.7003205128205128,
5100
+ "grad_norm": 1.4745279648366565e-05,
5101
+ "learning_rate": 5.550608489165924e-08,
5102
+ "logits/chosen": 0.84375,
5103
+ "logits/rejected": 1.09375,
5104
+ "logps/chosen": -150.0,
5105
+ "logps/rejected": -324.0,
5106
+ "loss": 0.0,
5107
+ "rewards/accuracies": 1.0,
5108
+ "rewards/chosen": 3.578125,
5109
+ "rewards/margins": 23.25,
5110
+ "rewards/rejected": -19.75,
5111
+ "step": 3370
5112
+ },
5113
+ {
5114
+ "epoch": 2.7083333333333335,
5115
+ "grad_norm": 1.4514272796826e-06,
5116
+ "learning_rate": 5.4021964974769963e-08,
5117
+ "logits/chosen": 0.92578125,
5118
+ "logits/rejected": 1.359375,
5119
+ "logps/chosen": -163.0,
5120
+ "logps/rejected": -332.0,
5121
+ "loss": 0.0,
5122
+ "rewards/accuracies": 1.0,
5123
+ "rewards/chosen": 3.375,
5124
+ "rewards/margins": 23.0,
5125
+ "rewards/rejected": -19.625,
5126
+ "step": 3380
5127
+ },
5128
+ {
5129
+ "epoch": 2.7163461538461537,
5130
+ "grad_norm": 0.0005888690086543229,
5131
+ "learning_rate": 5.253784505788068e-08,
5132
+ "logits/chosen": 1.03125,
5133
+ "logits/rejected": 1.0234375,
5134
+ "logps/chosen": -155.0,
5135
+ "logps/rejected": -328.0,
5136
+ "loss": 0.0,
5137
+ "rewards/accuracies": 1.0,
5138
+ "rewards/chosen": 3.125,
5139
+ "rewards/margins": 22.625,
5140
+ "rewards/rejected": -19.5,
5141
+ "step": 3390
5142
+ },
5143
+ {
5144
+ "epoch": 2.7243589743589745,
5145
+ "grad_norm": 2.0042622885174553e-05,
5146
+ "learning_rate": 5.105372514099139e-08,
5147
+ "logits/chosen": 0.87890625,
5148
+ "logits/rejected": 1.390625,
5149
+ "logps/chosen": -144.0,
5150
+ "logps/rejected": -336.0,
5151
+ "loss": 0.0,
5152
+ "rewards/accuracies": 1.0,
5153
+ "rewards/chosen": 3.6875,
5154
+ "rewards/margins": 23.5,
5155
+ "rewards/rejected": -19.875,
5156
+ "step": 3400
5157
+ },
5158
+ {
5159
+ "epoch": 2.7323717948717947,
5160
+ "grad_norm": 5.635967172397906e-05,
5161
+ "learning_rate": 4.9569605224102104e-08,
5162
+ "logits/chosen": 0.99609375,
5163
+ "logits/rejected": 1.4375,
5164
+ "logps/chosen": -171.0,
5165
+ "logps/rejected": -336.0,
5166
+ "loss": 0.0,
5167
+ "rewards/accuracies": 1.0,
5168
+ "rewards/chosen": 3.546875,
5169
+ "rewards/margins": 24.125,
5170
+ "rewards/rejected": -20.625,
5171
+ "step": 3410
5172
+ },
5173
+ {
5174
+ "epoch": 2.7403846153846154,
5175
+ "grad_norm": 6.551150056642263e-06,
5176
+ "learning_rate": 4.808548530721282e-08,
5177
+ "logits/chosen": 1.0078125,
5178
+ "logits/rejected": 1.359375,
5179
+ "logps/chosen": -172.0,
5180
+ "logps/rejected": -350.0,
5181
+ "loss": 0.0,
5182
+ "rewards/accuracies": 1.0,
5183
+ "rewards/chosen": 3.609375,
5184
+ "rewards/margins": 24.5,
5185
+ "rewards/rejected": -20.875,
5186
+ "step": 3420
5187
+ },
5188
+ {
5189
+ "epoch": 2.748397435897436,
5190
+ "grad_norm": 2.170737569638303e-05,
5191
+ "learning_rate": 4.660136539032353e-08,
5192
+ "logits/chosen": 1.09375,
5193
+ "logits/rejected": 1.3203125,
5194
+ "logps/chosen": -170.0,
5195
+ "logps/rejected": -348.0,
5196
+ "loss": 0.0,
5197
+ "rewards/accuracies": 1.0,
5198
+ "rewards/chosen": 3.40625,
5199
+ "rewards/margins": 24.75,
5200
+ "rewards/rejected": -21.375,
5201
+ "step": 3430
5202
+ },
5203
+ {
5204
+ "epoch": 2.7564102564102564,
5205
+ "grad_norm": 3.662218737713611e-05,
5206
+ "learning_rate": 4.511724547343425e-08,
5207
+ "logits/chosen": 0.734375,
5208
+ "logits/rejected": 1.3671875,
5209
+ "logps/chosen": -139.0,
5210
+ "logps/rejected": -334.0,
5211
+ "loss": 0.0,
5212
+ "rewards/accuracies": 1.0,
5213
+ "rewards/chosen": 3.359375,
5214
+ "rewards/margins": 23.5,
5215
+ "rewards/rejected": -20.125,
5216
+ "step": 3440
5217
+ },
5218
+ {
5219
+ "epoch": 2.7644230769230766,
5220
+ "grad_norm": 2.0092823057660767e-07,
5221
+ "learning_rate": 4.363312555654497e-08,
5222
+ "logits/chosen": 1.0078125,
5223
+ "logits/rejected": 1.1640625,
5224
+ "logps/chosen": -178.0,
5225
+ "logps/rejected": -334.0,
5226
+ "loss": 0.0,
5227
+ "rewards/accuracies": 1.0,
5228
+ "rewards/chosen": 3.53125,
5229
+ "rewards/margins": 22.5,
5230
+ "rewards/rejected": -19.0,
5231
+ "step": 3450
5232
+ },
5233
+ {
5234
+ "epoch": 2.7724358974358974,
5235
+ "grad_norm": 8.047688007594987e-05,
5236
+ "learning_rate": 4.2149005639655685e-08,
5237
+ "logits/chosen": 0.8046875,
5238
+ "logits/rejected": 1.2578125,
5239
+ "logps/chosen": -163.0,
5240
+ "logps/rejected": -354.0,
5241
+ "loss": 0.0,
5242
+ "rewards/accuracies": 1.0,
5243
+ "rewards/chosen": 3.46875,
5244
+ "rewards/margins": 24.75,
5245
+ "rewards/rejected": -21.25,
5246
+ "step": 3460
5247
+ },
5248
+ {
5249
+ "epoch": 2.780448717948718,
5250
+ "grad_norm": 0.0006711480616614469,
5251
+ "learning_rate": 4.06648857227664e-08,
5252
+ "logits/chosen": 0.76953125,
5253
+ "logits/rejected": 1.40625,
5254
+ "logps/chosen": -167.0,
5255
+ "logps/rejected": -330.0,
5256
+ "loss": 0.0,
5257
+ "rewards/accuracies": 1.0,
5258
+ "rewards/chosen": 3.421875,
5259
+ "rewards/margins": 23.125,
5260
+ "rewards/rejected": -19.75,
5261
+ "step": 3470
5262
+ },
5263
+ {
5264
+ "epoch": 2.7884615384615383,
5265
+ "grad_norm": 0.0003280497102533483,
5266
+ "learning_rate": 3.918076580587711e-08,
5267
+ "logits/chosen": 0.6875,
5268
+ "logits/rejected": 1.1796875,
5269
+ "logps/chosen": -174.0,
5270
+ "logps/rejected": -350.0,
5271
+ "loss": 0.0,
5272
+ "rewards/accuracies": 1.0,
5273
+ "rewards/chosen": 3.265625,
5274
+ "rewards/margins": 23.125,
5275
+ "rewards/rejected": -19.875,
5276
+ "step": 3480
5277
+ },
5278
+ {
5279
+ "epoch": 2.796474358974359,
5280
+ "grad_norm": 6.798795586197906e-05,
5281
+ "learning_rate": 3.7696645888987825e-08,
5282
+ "logits/chosen": 0.87109375,
5283
+ "logits/rejected": 0.99609375,
5284
+ "logps/chosen": -147.0,
5285
+ "logps/rejected": -356.0,
5286
+ "loss": 0.0,
5287
+ "rewards/accuracies": 1.0,
5288
+ "rewards/chosen": 3.265625,
5289
+ "rewards/margins": 24.625,
5290
+ "rewards/rejected": -21.375,
5291
+ "step": 3490
5292
+ },
5293
+ {
5294
+ "epoch": 2.8044871794871797,
5295
+ "grad_norm": 4.364786537805849e-06,
5296
+ "learning_rate": 3.621252597209854e-08,
5297
+ "logits/chosen": 0.91796875,
5298
+ "logits/rejected": 1.3984375,
5299
+ "logps/chosen": -159.0,
5300
+ "logps/rejected": -338.0,
5301
+ "loss": 0.0,
5302
+ "rewards/accuracies": 1.0,
5303
+ "rewards/chosen": 3.375,
5304
+ "rewards/margins": 23.625,
5305
+ "rewards/rejected": -20.25,
5306
+ "step": 3500
5307
+ },
5308
+ {
5309
+ "epoch": 2.8125,
5310
+ "grad_norm": 1.9387537093442454e-08,
5311
+ "learning_rate": 3.4728406055209265e-08,
5312
+ "logits/chosen": 0.75390625,
5313
+ "logits/rejected": 1.1171875,
5314
+ "logps/chosen": -185.0,
5315
+ "logps/rejected": -348.0,
5316
+ "loss": 0.0,
5317
+ "rewards/accuracies": 1.0,
5318
+ "rewards/chosen": 3.578125,
5319
+ "rewards/margins": 25.125,
5320
+ "rewards/rejected": -21.5,
5321
+ "step": 3510
5322
+ },
5323
+ {
5324
+ "epoch": 2.8205128205128203,
5325
+ "grad_norm": 1.3880341861407807e-05,
5326
+ "learning_rate": 3.324428613831998e-08,
5327
+ "logits/chosen": 1.015625,
5328
+ "logits/rejected": 1.296875,
5329
+ "logps/chosen": -143.0,
5330
+ "logps/rejected": -358.0,
5331
+ "loss": 0.0,
5332
+ "rewards/accuracies": 1.0,
5333
+ "rewards/chosen": 3.53125,
5334
+ "rewards/margins": 24.75,
5335
+ "rewards/rejected": -21.25,
5336
+ "step": 3520
5337
+ },
5338
+ {
5339
+ "epoch": 2.828525641025641,
5340
+ "grad_norm": 1.8304365557650966e-05,
5341
+ "learning_rate": 3.176016622143069e-08,
5342
+ "logits/chosen": 1.046875,
5343
+ "logits/rejected": 1.203125,
5344
+ "logps/chosen": -167.0,
5345
+ "logps/rejected": -340.0,
5346
+ "loss": 0.0,
5347
+ "rewards/accuracies": 1.0,
5348
+ "rewards/chosen": 3.78125,
5349
+ "rewards/margins": 24.25,
5350
+ "rewards/rejected": -20.5,
5351
+ "step": 3530
5352
+ },
5353
+ {
5354
+ "epoch": 2.8365384615384617,
5355
+ "grad_norm": 5.07839088524003e-07,
5356
+ "learning_rate": 3.0276046304541406e-08,
5357
+ "logits/chosen": 0.94140625,
5358
+ "logits/rejected": 1.203125,
5359
+ "logps/chosen": -170.0,
5360
+ "logps/rejected": -344.0,
5361
+ "loss": 0.0,
5362
+ "rewards/accuracies": 1.0,
5363
+ "rewards/chosen": 3.140625,
5364
+ "rewards/margins": 23.75,
5365
+ "rewards/rejected": -20.625,
5366
+ "step": 3540
5367
+ },
5368
+ {
5369
+ "epoch": 2.844551282051282,
5370
+ "grad_norm": 6.497502370566641e-06,
5371
+ "learning_rate": 2.879192638765212e-08,
5372
+ "logits/chosen": 0.7578125,
5373
+ "logits/rejected": 1.09375,
5374
+ "logps/chosen": -180.0,
5375
+ "logps/rejected": -320.0,
5376
+ "loss": 0.0,
5377
+ "rewards/accuracies": 1.0,
5378
+ "rewards/chosen": 3.796875,
5379
+ "rewards/margins": 22.125,
5380
+ "rewards/rejected": -18.375,
5381
+ "step": 3550
5382
+ },
5383
+ {
5384
+ "epoch": 2.8525641025641026,
5385
+ "grad_norm": 2.0919183207981193e-05,
5386
+ "learning_rate": 2.7307806470762836e-08,
5387
+ "logits/chosen": 0.93359375,
5388
+ "logits/rejected": 1.4609375,
5389
+ "logps/chosen": -171.0,
5390
+ "logps/rejected": -334.0,
5391
+ "loss": 0.0,
5392
+ "rewards/accuracies": 1.0,
5393
+ "rewards/chosen": 3.65625,
5394
+ "rewards/margins": 23.25,
5395
+ "rewards/rejected": -19.5,
5396
+ "step": 3560
5397
+ },
5398
+ {
5399
+ "epoch": 2.8605769230769234,
5400
+ "grad_norm": 5.495725650489294e-08,
5401
+ "learning_rate": 2.5823686553873553e-08,
5402
+ "logits/chosen": 0.8359375,
5403
+ "logits/rejected": 1.140625,
5404
+ "logps/chosen": -166.0,
5405
+ "logps/rejected": -366.0,
5406
+ "loss": 0.0,
5407
+ "rewards/accuracies": 1.0,
5408
+ "rewards/chosen": 2.984375,
5409
+ "rewards/margins": 25.125,
5410
+ "rewards/rejected": -22.125,
5411
+ "step": 3570
5412
+ },
5413
+ {
5414
+ "epoch": 2.8685897435897436,
5415
+ "grad_norm": 0.00027587029209878266,
5416
+ "learning_rate": 2.4339566636984267e-08,
5417
+ "logits/chosen": 0.92578125,
5418
+ "logits/rejected": 1.46875,
5419
+ "logps/chosen": -178.0,
5420
+ "logps/rejected": -314.0,
5421
+ "loss": 0.0,
5422
+ "rewards/accuracies": 1.0,
5423
+ "rewards/chosen": 3.78125,
5424
+ "rewards/margins": 21.75,
5425
+ "rewards/rejected": -18.0,
5426
+ "step": 3580
5427
+ },
5428
+ {
5429
+ "epoch": 2.876602564102564,
5430
+ "grad_norm": 3.145906163646421e-06,
5431
+ "learning_rate": 2.2855446720094983e-08,
5432
+ "logits/chosen": 0.93359375,
5433
+ "logits/rejected": 1.2109375,
5434
+ "logps/chosen": -168.0,
5435
+ "logps/rejected": -356.0,
5436
+ "loss": 0.0,
5437
+ "rewards/accuracies": 1.0,
5438
+ "rewards/chosen": 3.390625,
5439
+ "rewards/margins": 24.875,
5440
+ "rewards/rejected": -21.5,
5441
+ "step": 3590
5442
+ },
5443
+ {
5444
+ "epoch": 2.8846153846153846,
5445
+ "grad_norm": 0.002758379167699333,
5446
+ "learning_rate": 2.1371326803205697e-08,
5447
+ "logits/chosen": 0.92578125,
5448
+ "logits/rejected": 1.46875,
5449
+ "logps/chosen": -172.0,
5450
+ "logps/rejected": -328.0,
5451
+ "loss": 0.0,
5452
+ "rewards/accuracies": 1.0,
5453
+ "rewards/chosen": 3.875,
5454
+ "rewards/margins": 23.25,
5455
+ "rewards/rejected": -19.375,
5456
+ "step": 3600
5457
+ },
5458
+ {
5459
+ "epoch": 2.8926282051282053,
5460
+ "grad_norm": 3.8764981035982807e-07,
5461
+ "learning_rate": 1.9887206886316414e-08,
5462
+ "logits/chosen": 0.8046875,
5463
+ "logits/rejected": 1.4375,
5464
+ "logps/chosen": -166.0,
5465
+ "logps/rejected": -364.0,
5466
+ "loss": 0.0,
5467
+ "rewards/accuracies": 1.0,
5468
+ "rewards/chosen": 2.875,
5469
+ "rewards/margins": 25.25,
5470
+ "rewards/rejected": -22.5,
5471
+ "step": 3610
5472
+ },
5473
+ {
5474
+ "epoch": 2.9006410256410255,
5475
+ "grad_norm": 2.459298995862145e-05,
5476
+ "learning_rate": 1.840308696942713e-08,
5477
+ "logits/chosen": 0.9296875,
5478
+ "logits/rejected": 1.2109375,
5479
+ "logps/chosen": -170.0,
5480
+ "logps/rejected": -346.0,
5481
+ "loss": 0.0,
5482
+ "rewards/accuracies": 1.0,
5483
+ "rewards/chosen": 3.390625,
5484
+ "rewards/margins": 24.0,
5485
+ "rewards/rejected": -20.5,
5486
+ "step": 3620
5487
+ },
5488
+ {
5489
+ "epoch": 2.9086538461538463,
5490
+ "grad_norm": 0.0033663621977632068,
5491
+ "learning_rate": 1.6918967052537844e-08,
5492
+ "logits/chosen": 0.90234375,
5493
+ "logits/rejected": 1.1015625,
5494
+ "logps/chosen": -158.0,
5495
+ "logps/rejected": -342.0,
5496
+ "loss": 0.0,
5497
+ "rewards/accuracies": 1.0,
5498
+ "rewards/chosen": 3.640625,
5499
+ "rewards/margins": 24.625,
5500
+ "rewards/rejected": -21.0,
5501
+ "step": 3630
5502
+ },
5503
+ {
5504
+ "epoch": 2.9166666666666665,
5505
+ "grad_norm": 2.0820327560442435e-05,
5506
+ "learning_rate": 1.5434847135648558e-08,
5507
+ "logits/chosen": 1.0,
5508
+ "logits/rejected": 1.4140625,
5509
+ "logps/chosen": -125.0,
5510
+ "logps/rejected": -336.0,
5511
+ "loss": 0.0,
5512
+ "rewards/accuracies": 1.0,
5513
+ "rewards/chosen": 3.078125,
5514
+ "rewards/margins": 23.625,
5515
+ "rewards/rejected": -20.625,
5516
+ "step": 3640
5517
+ },
5518
+ {
5519
+ "epoch": 2.9246794871794872,
5520
+ "grad_norm": 9.867503905955064e-07,
5521
+ "learning_rate": 1.3950727218759274e-08,
5522
+ "logits/chosen": 0.8359375,
5523
+ "logits/rejected": 1.2421875,
5524
+ "logps/chosen": -178.0,
5525
+ "logps/rejected": -354.0,
5526
+ "loss": 0.0,
5527
+ "rewards/accuracies": 1.0,
5528
+ "rewards/chosen": 3.328125,
5529
+ "rewards/margins": 24.75,
5530
+ "rewards/rejected": -21.5,
5531
+ "step": 3650
5532
+ },
5533
+ {
5534
+ "epoch": 2.9326923076923075,
5535
+ "grad_norm": 3.192135749286814e-05,
5536
+ "learning_rate": 1.2466607301869991e-08,
5537
+ "logits/chosen": 0.88671875,
5538
+ "logits/rejected": 1.3671875,
5539
+ "logps/chosen": -144.0,
5540
+ "logps/rejected": -348.0,
5541
+ "loss": 0.0,
5542
+ "rewards/accuracies": 1.0,
5543
+ "rewards/chosen": 3.578125,
5544
+ "rewards/margins": 24.875,
5545
+ "rewards/rejected": -21.25,
5546
+ "step": 3660
5547
+ },
5548
+ {
5549
+ "epoch": 2.940705128205128,
5550
+ "grad_norm": 2.715406001289612e-08,
5551
+ "learning_rate": 1.0982487384980706e-08,
5552
+ "logits/chosen": 1.046875,
5553
+ "logits/rejected": 1.203125,
5554
+ "logps/chosen": -157.0,
5555
+ "logps/rejected": -344.0,
5556
+ "loss": 0.0,
5557
+ "rewards/accuracies": 1.0,
5558
+ "rewards/chosen": 3.4375,
5559
+ "rewards/margins": 24.125,
5560
+ "rewards/rejected": -20.75,
5561
+ "step": 3670
5562
+ },
5563
+ {
5564
+ "epoch": 2.948717948717949,
5565
+ "grad_norm": 2.672696571203794e-07,
5566
+ "learning_rate": 9.498367468091422e-09,
5567
+ "logits/chosen": 0.78515625,
5568
+ "logits/rejected": 1.1484375,
5569
+ "logps/chosen": -177.0,
5570
+ "logps/rejected": -348.0,
5571
+ "loss": 0.0,
5572
+ "rewards/accuracies": 1.0,
5573
+ "rewards/chosen": 3.890625,
5574
+ "rewards/margins": 24.875,
5575
+ "rewards/rejected": -21.0,
5576
+ "step": 3680
5577
+ },
5578
+ {
5579
+ "epoch": 2.956730769230769,
5580
+ "grad_norm": 7.489910666014967e-05,
5581
+ "learning_rate": 8.014247551202137e-09,
5582
+ "logits/chosen": 0.54296875,
5583
+ "logits/rejected": 1.1953125,
5584
+ "logps/chosen": -206.0,
5585
+ "logps/rejected": -338.0,
5586
+ "loss": 0.0,
5587
+ "rewards/accuracies": 1.0,
5588
+ "rewards/chosen": 3.75,
5589
+ "rewards/margins": 23.625,
5590
+ "rewards/rejected": -19.875,
5591
+ "step": 3690
5592
+ },
5593
+ {
5594
+ "epoch": 2.96474358974359,
5595
+ "grad_norm": 1.3844115894190923e-05,
5596
+ "learning_rate": 6.530127634312852e-09,
5597
+ "logits/chosen": 0.85546875,
5598
+ "logits/rejected": 1.0703125,
5599
+ "logps/chosen": -172.0,
5600
+ "logps/rejected": -328.0,
5601
+ "loss": 0.0,
5602
+ "rewards/accuracies": 1.0,
5603
+ "rewards/chosen": 3.671875,
5604
+ "rewards/margins": 22.5,
5605
+ "rewards/rejected": -18.875,
5606
+ "step": 3700
5607
+ },
5608
+ {
5609
+ "epoch": 2.97275641025641,
5610
+ "grad_norm": 2.3143826423249172e-05,
5611
+ "learning_rate": 5.046007717423567e-09,
5612
+ "logits/chosen": 1.0390625,
5613
+ "logits/rejected": 1.0390625,
5614
+ "logps/chosen": -152.0,
5615
+ "logps/rejected": -332.0,
5616
+ "loss": 0.0,
5617
+ "rewards/accuracies": 1.0,
5618
+ "rewards/chosen": 3.46875,
5619
+ "rewards/margins": 24.75,
5620
+ "rewards/rejected": -21.25,
5621
+ "step": 3710
5622
+ },
5623
+ {
5624
+ "epoch": 2.980769230769231,
5625
+ "grad_norm": 9.095521401560934e-05,
5626
+ "learning_rate": 3.561887800534283e-09,
5627
+ "logits/chosen": 0.90625,
5628
+ "logits/rejected": 0.86328125,
5629
+ "logps/chosen": -127.0,
5630
+ "logps/rejected": -326.0,
5631
+ "loss": 0.0002,
5632
+ "rewards/accuracies": 1.0,
5633
+ "rewards/chosen": 3.171875,
5634
+ "rewards/margins": 23.75,
5635
+ "rewards/rejected": -20.5,
5636
+ "step": 3720
5637
+ },
5638
+ {
5639
+ "epoch": 2.988782051282051,
5640
+ "grad_norm": 2.7589599160714447e-07,
5641
+ "learning_rate": 2.0777678836449987e-09,
5642
+ "logits/chosen": 0.73046875,
5643
+ "logits/rejected": 1.078125,
5644
+ "logps/chosen": -193.0,
5645
+ "logps/rejected": -336.0,
5646
+ "loss": 0.0,
5647
+ "rewards/accuracies": 1.0,
5648
+ "rewards/chosen": 3.25,
5649
+ "rewards/margins": 24.5,
5650
+ "rewards/rejected": -21.125,
5651
+ "step": 3730
5652
+ },
5653
+ {
5654
+ "epoch": 2.996794871794872,
5655
+ "grad_norm": 5.964151597883472e-05,
5656
+ "learning_rate": 5.936479667557139e-10,
5657
+ "logits/chosen": 0.83203125,
5658
+ "logits/rejected": 1.3046875,
5659
+ "logps/chosen": -153.0,
5660
+ "logps/rejected": -336.0,
5661
+ "loss": 0.0,
5662
+ "rewards/accuracies": 1.0,
5663
+ "rewards/chosen": 3.609375,
5664
+ "rewards/margins": 23.125,
5665
+ "rewards/rejected": -19.5,
5666
+ "step": 3740
5667
+ },
5668
+ {
5669
+ "epoch": 3.0,
5670
+ "eval_logits/chosen": 0.88671875,
5671
+ "eval_logits/rejected": 1.4453125,
5672
+ "eval_logps/chosen": -170.0,
5673
+ "eval_logps/rejected": -344.0,
5674
+ "eval_loss": 2.2770025225327117e-06,
5675
+ "eval_rewards/accuracies": 1.0,
5676
+ "eval_rewards/chosen": 3.296875,
5677
+ "eval_rewards/margins": 23.5,
5678
+ "eval_rewards/rejected": -20.25,
5679
+ "eval_runtime": 32.9212,
5680
+ "eval_samples_per_second": 6.045,
5681
+ "eval_steps_per_second": 0.759,
5682
+ "step": 3744
5683
  }
5684
  ],
5685
  "logging_steps": 10,
 
5694
  "should_evaluate": false,
5695
  "should_log": false,
5696
  "should_save": true,
5697
+ "should_training_stop": true
5698
  },
5699
  "attributes": {}
5700
  }