Save model at global step 40
Browse files- config.yaml +19 -15
- data.pt +1 -1
- extra_state_world_size_4_rank_0.pt +1 -1
- extra_state_world_size_4_rank_1.pt +1 -1
- extra_state_world_size_4_rank_2.pt +1 -1
- extra_state_world_size_4_rank_3.pt +1 -1
- model_world_size_4_rank_0.pt +1 -1
- model_world_size_4_rank_1.pt +1 -1
- model_world_size_4_rank_2.pt +1 -1
- model_world_size_4_rank_3.pt +1 -1
- optim_world_size_4_rank_0.pt +1 -1
- optim_world_size_4_rank_1.pt +1 -1
- optim_world_size_4_rank_2.pt +1 -1
- optim_world_size_4_rank_3.pt +1 -1
config.yaml
CHANGED
@@ -5,12 +5,12 @@ data:
|
|
5 |
prompt_key: prompt
|
6 |
reward_fn_key: data_source
|
7 |
max_prompt_length: 1024
|
8 |
-
max_response_length:
|
9 |
train_batch_size: 128
|
10 |
val_batch_size: 640
|
11 |
return_raw_input_ids: false
|
12 |
return_raw_chat: false
|
13 |
-
shuffle:
|
14 |
filter_overlong_prompts: false
|
15 |
filter_overlong_prompts_workers: 1
|
16 |
truncation: left
|
@@ -39,17 +39,15 @@ actor_rollout_ref:
|
|
39 |
train_files: null
|
40 |
response_key: response
|
41 |
response_truncation: right
|
42 |
-
shuffle:
|
43 |
update_size: 128
|
44 |
strategy: fsdp
|
45 |
-
|
46 |
-
|
47 |
-
ppo_micro_batch_size_per_gpu: 4
|
48 |
use_dynamic_bsz: false
|
49 |
-
|
50 |
grad_clip: 1.0
|
51 |
use_torch_compile: true
|
52 |
-
ppo_epochs: 1
|
53 |
shuffle: false
|
54 |
ulysses_sequence_parallel_size: 1
|
55 |
checkpoint:
|
@@ -63,7 +61,7 @@ actor_rollout_ref:
|
|
63 |
lr_warmup_steps_ratio: 0
|
64 |
min_lr_ratio: null
|
65 |
warmup_style: constant
|
66 |
-
total_training_steps:
|
67 |
weight_decay: 0.01
|
68 |
fsdp_config:
|
69 |
wrap_policy:
|
@@ -71,6 +69,10 @@ actor_rollout_ref:
|
|
71 |
param_offload: false
|
72 |
optimizer_offload: false
|
73 |
fsdp_size: -1
|
|
|
|
|
|
|
|
|
74 |
ref:
|
75 |
ref_model_path: Qwen/Qwen2.5-Math-1.5B
|
76 |
strategy: fsdp
|
@@ -90,7 +92,7 @@ actor_rollout_ref:
|
|
90 |
top_p: 1
|
91 |
use_fire_sampling: false
|
92 |
prompt_length: 1024
|
93 |
-
response_length:
|
94 |
dtype: bfloat16
|
95 |
gpu_memory_utilization: 0.7
|
96 |
ignore_eos: false
|
@@ -98,7 +100,7 @@ actor_rollout_ref:
|
|
98 |
free_cache_engine: false
|
99 |
load_format: dummy_dtensor
|
100 |
tensor_model_parallel_size: 4
|
101 |
-
max_num_batched_tokens:
|
102 |
max_model_len: null
|
103 |
max_num_seqs: 1024
|
104 |
log_prob_micro_batch_size: null
|
@@ -130,8 +132,8 @@ reward_model:
|
|
130 |
min_num_params: 0
|
131 |
param_offload: false
|
132 |
fsdp_size: -1
|
133 |
-
|
134 |
-
|
135 |
max_length: null
|
136 |
ulysses_sequence_parallel_size: 1
|
137 |
use_dynamic_bsz: false
|
@@ -145,8 +147,8 @@ custom_reward_function:
|
|
145 |
name: compute_score
|
146 |
trainer:
|
147 |
balance_batch: true
|
148 |
-
total_epochs:
|
149 |
-
total_training_steps:
|
150 |
project_name: value-LLM
|
151 |
experiment_name: brm-dapo-qwen2.5math-1.5B-base_lr2.5e-6-beta0.002
|
152 |
logger:
|
@@ -174,3 +176,5 @@ trainer:
|
|
174 |
algorithm:
|
175 |
kl_ctrl:
|
176 |
kl_coef: 0.002
|
|
|
|
|
|
5 |
prompt_key: prompt
|
6 |
reward_fn_key: data_source
|
7 |
max_prompt_length: 1024
|
8 |
+
max_response_length: 3072
|
9 |
train_batch_size: 128
|
10 |
val_batch_size: 640
|
11 |
return_raw_input_ids: false
|
12 |
return_raw_chat: false
|
13 |
+
shuffle: false
|
14 |
filter_overlong_prompts: false
|
15 |
filter_overlong_prompts_workers: 1
|
16 |
truncation: left
|
|
|
39 |
train_files: null
|
40 |
response_key: response
|
41 |
response_truncation: right
|
42 |
+
shuffle: false
|
43 |
update_size: 128
|
44 |
strategy: fsdp
|
45 |
+
actor_micro_batch_size: null
|
46 |
+
actor_micro_batch_size_per_gpu: 4
|
|
|
47 |
use_dynamic_bsz: false
|
48 |
+
actor_max_token_len_per_gpu: 16384
|
49 |
grad_clip: 1.0
|
50 |
use_torch_compile: true
|
|
|
51 |
shuffle: false
|
52 |
ulysses_sequence_parallel_size: 1
|
53 |
checkpoint:
|
|
|
61 |
lr_warmup_steps_ratio: 0
|
62 |
min_lr_ratio: null
|
63 |
warmup_style: constant
|
64 |
+
total_training_steps: 200
|
65 |
weight_decay: 0.01
|
66 |
fsdp_config:
|
67 |
wrap_policy:
|
|
|
69 |
param_offload: false
|
70 |
optimizer_offload: false
|
71 |
fsdp_size: -1
|
72 |
+
report_entropy: true
|
73 |
+
actor_mini_batch_size: 128
|
74 |
+
use_kl_loss: false
|
75 |
+
actor_epochs: 1
|
76 |
ref:
|
77 |
ref_model_path: Qwen/Qwen2.5-Math-1.5B
|
78 |
strategy: fsdp
|
|
|
92 |
top_p: 1
|
93 |
use_fire_sampling: false
|
94 |
prompt_length: 1024
|
95 |
+
response_length: 3072
|
96 |
dtype: bfloat16
|
97 |
gpu_memory_utilization: 0.7
|
98 |
ignore_eos: false
|
|
|
100 |
free_cache_engine: false
|
101 |
load_format: dummy_dtensor
|
102 |
tensor_model_parallel_size: 4
|
103 |
+
max_num_batched_tokens: 5096
|
104 |
max_model_len: null
|
105 |
max_num_seqs: 1024
|
106 |
log_prob_micro_batch_size: null
|
|
|
132 |
min_num_params: 0
|
133 |
param_offload: false
|
134 |
fsdp_size: -1
|
135 |
+
actor_micro_batch_size: null
|
136 |
+
actor_micro_batch_size_per_gpu: null
|
137 |
max_length: null
|
138 |
ulysses_sequence_parallel_size: 1
|
139 |
use_dynamic_bsz: false
|
|
|
147 |
name: compute_score
|
148 |
trainer:
|
149 |
balance_batch: true
|
150 |
+
total_epochs: 100
|
151 |
+
total_training_steps: 200
|
152 |
project_name: value-LLM
|
153 |
experiment_name: brm-dapo-qwen2.5math-1.5B-base_lr2.5e-6-beta0.002
|
154 |
logger:
|
|
|
176 |
algorithm:
|
177 |
kl_ctrl:
|
178 |
kl_coef: 0.002
|
179 |
+
use_kl_in_reward: false
|
180 |
+
adv_estimator: none
|
data.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1492
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c212e96abb1f5ae386cf5dc23176f00a5736862617b92bf3de966c5dfda9400
|
3 |
size 1492
|
extra_state_world_size_4_rank_0.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14632
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bac478195e64c5adc1589f984ee9504b31ee50d7781d99cdf22ecfe513641e38
|
3 |
size 14632
|
extra_state_world_size_4_rank_1.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14632
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bd9eb602b937ae3abfdc710b6d8ee83926d7c65b5de1c6c28b9ae4f7e9b142f
|
3 |
size 14632
|
extra_state_world_size_4_rank_2.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14632
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80a362d156678550afad0428cd91309dea2ab23369a46041c12e2ed80ad62ff7
|
3 |
size 14632
|
extra_state_world_size_4_rank_3.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14632
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a53baac35ae90ef7df9bc4c96819f371f332549cb2b330d17683f70d526a426
|
3 |
size 14632
|
model_world_size_4_rank_0.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1777276538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:056770c9e91a4a9083cebcbd610e2db8050a57faa7f76c145bfee83d3da0c9e4
|
3 |
size 1777276538
|
model_world_size_4_rank_1.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1777276538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffc8cace4da138ff67214cffba51b8443661fd41f4079d762ee0f2310a38f3b9
|
3 |
size 1777276538
|
model_world_size_4_rank_2.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1777276538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:089b2235d15061977973a73ffd2f75863dd4c83294873ee4c0c578b13534a6d5
|
3 |
size 1777276538
|
model_world_size_4_rank_3.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1777276538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5ada2141949e97f0b6b4248ef91506dbfe959a5f6e42671c3bde140e23d891e
|
3 |
size 1777276538
|
optim_world_size_4_rank_0.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3087454775
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7373adeb1aff488c5e1f4f9ff6065314aff9b175eb58166fba53d03c643b45dd
|
3 |
size 3087454775
|
optim_world_size_4_rank_1.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3087454775
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ea11f1272e9b4971d23bf2e84177de0127c00396539e9bce57d53b7033730c0
|
3 |
size 3087454775
|
optim_world_size_4_rank_2.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3087454775
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a61c4cdf03fd68426e3a3799500a5e729e3616c97becfbcab00d0fe618f3fd3
|
3 |
size 3087454775
|
optim_world_size_4_rank_3.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3087454775
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bc903b6b259fd4f43e397b943be707c2ef26d94ace7a99fe322360e84c6a262
|
3 |
size 3087454775
|