RyanYr commited on
Commit
aa888ee
·
verified ·
1 Parent(s): e603abc

Save model at global step 40

Browse files
config.yaml CHANGED
@@ -5,12 +5,12 @@ data:
5
  prompt_key: prompt
6
  reward_fn_key: data_source
7
  max_prompt_length: 1024
8
- max_response_length: 2048
9
  train_batch_size: 128
10
  val_batch_size: 640
11
  return_raw_input_ids: false
12
  return_raw_chat: false
13
- shuffle: true
14
  filter_overlong_prompts: false
15
  filter_overlong_prompts_workers: 1
16
  truncation: left
@@ -39,17 +39,15 @@ actor_rollout_ref:
39
  train_files: null
40
  response_key: response
41
  response_truncation: right
42
- shuffle: true
43
  update_size: 128
44
  strategy: fsdp
45
- ppo_mini_batch_size: 128
46
- ppo_micro_batch_size: null
47
- ppo_micro_batch_size_per_gpu: 4
48
  use_dynamic_bsz: false
49
- ppo_max_token_len_per_gpu: 16384
50
  grad_clip: 1.0
51
  use_torch_compile: true
52
- ppo_epochs: 1
53
  shuffle: false
54
  ulysses_sequence_parallel_size: 1
55
  checkpoint:
@@ -63,7 +61,7 @@ actor_rollout_ref:
63
  lr_warmup_steps_ratio: 0
64
  min_lr_ratio: null
65
  warmup_style: constant
66
- total_training_steps: 810
67
  weight_decay: 0.01
68
  fsdp_config:
69
  wrap_policy:
@@ -71,6 +69,10 @@ actor_rollout_ref:
71
  param_offload: false
72
  optimizer_offload: false
73
  fsdp_size: -1
 
 
 
 
74
  ref:
75
  ref_model_path: Qwen/Qwen2.5-Math-1.5B
76
  strategy: fsdp
@@ -90,7 +92,7 @@ actor_rollout_ref:
90
  top_p: 1
91
  use_fire_sampling: false
92
  prompt_length: 1024
93
- response_length: 2048
94
  dtype: bfloat16
95
  gpu_memory_utilization: 0.7
96
  ignore_eos: false
@@ -98,7 +100,7 @@ actor_rollout_ref:
98
  free_cache_engine: false
99
  load_format: dummy_dtensor
100
  tensor_model_parallel_size: 4
101
- max_num_batched_tokens: 4072
102
  max_model_len: null
103
  max_num_seqs: 1024
104
  log_prob_micro_batch_size: null
@@ -130,8 +132,8 @@ reward_model:
130
  min_num_params: 0
131
  param_offload: false
132
  fsdp_size: -1
133
- micro_batch_size: null
134
- micro_batch_size_per_gpu: null
135
  max_length: null
136
  ulysses_sequence_parallel_size: 1
137
  use_dynamic_bsz: false
@@ -145,8 +147,8 @@ custom_reward_function:
145
  name: compute_score
146
  trainer:
147
  balance_batch: true
148
- total_epochs: 6
149
- total_training_steps: null
150
  project_name: value-LLM
151
  experiment_name: brm-dapo-qwen2.5math-1.5B-base_lr2.5e-6-beta0.002
152
  logger:
@@ -174,3 +176,5 @@ trainer:
174
  algorithm:
175
  kl_ctrl:
176
  kl_coef: 0.002
 
 
 
5
  prompt_key: prompt
6
  reward_fn_key: data_source
7
  max_prompt_length: 1024
8
+ max_response_length: 3072
9
  train_batch_size: 128
10
  val_batch_size: 640
11
  return_raw_input_ids: false
12
  return_raw_chat: false
13
+ shuffle: false
14
  filter_overlong_prompts: false
15
  filter_overlong_prompts_workers: 1
16
  truncation: left
 
39
  train_files: null
40
  response_key: response
41
  response_truncation: right
42
+ shuffle: false
43
  update_size: 128
44
  strategy: fsdp
45
+ actor_micro_batch_size: null
46
+ actor_micro_batch_size_per_gpu: 4
 
47
  use_dynamic_bsz: false
48
+ actor_max_token_len_per_gpu: 16384
49
  grad_clip: 1.0
50
  use_torch_compile: true
 
51
  shuffle: false
52
  ulysses_sequence_parallel_size: 1
53
  checkpoint:
 
61
  lr_warmup_steps_ratio: 0
62
  min_lr_ratio: null
63
  warmup_style: constant
64
+ total_training_steps: 200
65
  weight_decay: 0.01
66
  fsdp_config:
67
  wrap_policy:
 
69
  param_offload: false
70
  optimizer_offload: false
71
  fsdp_size: -1
72
+ report_entropy: true
73
+ actor_mini_batch_size: 128
74
+ use_kl_loss: false
75
+ actor_epochs: 1
76
  ref:
77
  ref_model_path: Qwen/Qwen2.5-Math-1.5B
78
  strategy: fsdp
 
92
  top_p: 1
93
  use_fire_sampling: false
94
  prompt_length: 1024
95
+ response_length: 3072
96
  dtype: bfloat16
97
  gpu_memory_utilization: 0.7
98
  ignore_eos: false
 
100
  free_cache_engine: false
101
  load_format: dummy_dtensor
102
  tensor_model_parallel_size: 4
103
+ max_num_batched_tokens: 5096
104
  max_model_len: null
105
  max_num_seqs: 1024
106
  log_prob_micro_batch_size: null
 
132
  min_num_params: 0
133
  param_offload: false
134
  fsdp_size: -1
135
+ actor_micro_batch_size: null
136
+ actor_micro_batch_size_per_gpu: null
137
  max_length: null
138
  ulysses_sequence_parallel_size: 1
139
  use_dynamic_bsz: false
 
147
  name: compute_score
148
  trainer:
149
  balance_batch: true
150
+ total_epochs: 100
151
+ total_training_steps: 200
152
  project_name: value-LLM
153
  experiment_name: brm-dapo-qwen2.5math-1.5B-base_lr2.5e-6-beta0.002
154
  logger:
 
176
  algorithm:
177
  kl_ctrl:
178
  kl_coef: 0.002
179
+ use_kl_in_reward: false
180
+ adv_estimator: none
data.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f6f475faf0bfb0b063d0aea47322ec9932da50a02ee37ea47d0d0b62293632c
3
  size 1492
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c212e96abb1f5ae386cf5dc23176f00a5736862617b92bf3de966c5dfda9400
3
  size 1492
extra_state_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17728ff021915ac36c35065b209e46a3a9dc992206150437a91c93859fd77ba5
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac478195e64c5adc1589f984ee9504b31ee50d7781d99cdf22ecfe513641e38
3
  size 14632
extra_state_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f99cb3c92a09a0d226b1ebd90e9be63e77702416878b63404992f60b94d8491e
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bd9eb602b937ae3abfdc710b6d8ee83926d7c65b5de1c6c28b9ae4f7e9b142f
3
  size 14632
extra_state_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bea50df638fba3dbf7ba2c4472acc8a4a6bbe82742465214401b73208e3caac
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80a362d156678550afad0428cd91309dea2ab23369a46041c12e2ed80ad62ff7
3
  size 14632
extra_state_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1bf91e43636e0ea8ba3dd299748b9997896cd326ac699e730584f1c572af319
3
  size 14632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a53baac35ae90ef7df9bc4c96819f371f332549cb2b330d17683f70d526a426
3
  size 14632
model_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:064a8fa7f15738276e0011ddfdad872f733cc9a3b31f2bde587079e8079bd3a9
3
  size 1777276538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:056770c9e91a4a9083cebcbd610e2db8050a57faa7f76c145bfee83d3da0c9e4
3
  size 1777276538
model_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18693386a1bf50f2f6d91aa95df67c9beb1e2b00f738ffe2b129da78ee7ddb7f
3
  size 1777276538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffc8cace4da138ff67214cffba51b8443661fd41f4079d762ee0f2310a38f3b9
3
  size 1777276538
model_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b0de637e4c0caa8a6ab883067552b98673e979b7d6f96d2dfd0cd1b520f7376
3
  size 1777276538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:089b2235d15061977973a73ffd2f75863dd4c83294873ee4c0c578b13534a6d5
3
  size 1777276538
model_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa0e98eb308fe6d82c6078cfaf4019a3f81f0e0b496b4ded3d050fdb7a751751
3
  size 1777276538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ada2141949e97f0b6b4248ef91506dbfe959a5f6e42671c3bde140e23d891e
3
  size 1777276538
optim_world_size_4_rank_0.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:014c6d547b159c558d0ac0aa24ae0d183acd34412b8135c583786778bc816d0a
3
  size 3087454775
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7373adeb1aff488c5e1f4f9ff6065314aff9b175eb58166fba53d03c643b45dd
3
  size 3087454775
optim_world_size_4_rank_1.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:936d0ba5343dd68dbec1bf66554d39e8c377c507af4c34380a2885b5ab375092
3
  size 3087454775
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ea11f1272e9b4971d23bf2e84177de0127c00396539e9bce57d53b7033730c0
3
  size 3087454775
optim_world_size_4_rank_2.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83991b0da5eb38438d33936780c81a81e0312f147ec7ed759790eaa0f698b2bb
3
  size 3087454775
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a61c4cdf03fd68426e3a3799500a5e729e3616c97becfbcab00d0fe618f3fd3
3
  size 3087454775
optim_world_size_4_rank_3.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25cfed09ceaa90e51e88dbd007b479bf35fb3509caca301b0468b6e3d3a7ddc5
3
  size 3087454775
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc903b6b259fd4f43e397b943be707c2ef26d94ace7a99fe322360e84c6a262
3
  size 3087454775