RyanYr commited on
Commit
dd3b709
·
verified ·
1 Parent(s): 05806a1

Save model at global step 40

Browse files
config.yaml ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ tokenizer: null
3
+ train_files: dapo_ds_train_sample.parquet
4
+ val_files: matheval.parquet
5
+ prompt_key: prompt
6
+ reward_fn_key: data_source
7
+ max_prompt_length: 768
8
+ max_response_length: 4096
9
+ train_batch_size: 128
10
+ val_batch_size: null
11
+ return_raw_input_ids: false
12
+ return_raw_chat: false
13
+ shuffle: true
14
+ filter_overlong_prompts: true
15
+ filter_overlong_prompts_workers: 1
16
+ truncation: error
17
+ image_key: images
18
+ custom_cls:
19
+ path: null
20
+ name: null
21
+ actor_rollout_ref:
22
+ hybrid_engine: true
23
+ model:
24
+ path: meta-llama/Llama-3.2-3B-Instruct
25
+ external_lib: null
26
+ override_config: {}
27
+ enable_gradient_checkpointing: true
28
+ use_remove_padding: true
29
+ use_liger: false
30
+ save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_actor
31
+ actor:
32
+ strategy: fsdp
33
+ ppo_mini_batch_size: 64
34
+ ppo_micro_batch_size: null
35
+ ppo_micro_batch_size_per_gpu: 2
36
+ use_dynamic_bsz: false
37
+ ppo_max_token_len_per_gpu: 16384
38
+ grad_clip: 1.0
39
+ clip_ratio: 0.2
40
+ clip_ratio_low: 0.2
41
+ clip_ratio_high: 0.2
42
+ clip_ratio_c: 3.0
43
+ loss_agg_mode: token-mean
44
+ entropy_coeff: 0.001
45
+ use_kl_loss: false
46
+ use_torch_compile: true
47
+ kl_loss_coef: 0.001
48
+ kl_loss_type: low_var_kl
49
+ ppo_epochs: 1
50
+ shuffle: false
51
+ ulysses_sequence_parallel_size: 1
52
+ checkpoint:
53
+ contents:
54
+ - model
55
+ - optimizer
56
+ - extra
57
+ optim:
58
+ lr: 1.0e-06
59
+ lr_warmup_steps: -1
60
+ lr_warmup_steps_ratio: 0.0
61
+ min_lr_ratio: null
62
+ warmup_style: constant
63
+ total_training_steps: 1620
64
+ weight_decay: 0.01
65
+ fsdp_config:
66
+ wrap_policy:
67
+ min_num_params: 0
68
+ param_offload: false
69
+ optimizer_offload: false
70
+ fsdp_size: -1
71
+ ref:
72
+ strategy: fsdp
73
+ fsdp_config:
74
+ param_offload: false
75
+ wrap_policy:
76
+ min_num_params: 0
77
+ log_prob_micro_batch_size: null
78
+ log_prob_micro_batch_size_per_gpu: 8
79
+ log_prob_use_dynamic_bsz: false
80
+ log_prob_max_token_len_per_gpu: 16384
81
+ ulysses_sequence_parallel_size: 1
82
+ rollout:
83
+ name: vllm
84
+ temperature: 1.0
85
+ top_k: -1
86
+ top_p: 1
87
+ use_fire_sampling: false
88
+ prompt_length: 768
89
+ response_length: 4096
90
+ dtype: bfloat16
91
+ gpu_memory_utilization: 0.75
92
+ ignore_eos: false
93
+ enforce_eager: false
94
+ free_cache_engine: false
95
+ load_format: dummy_dtensor
96
+ tensor_model_parallel_size: 4
97
+ max_num_batched_tokens: 4864
98
+ max_model_len: null
99
+ max_num_seqs: 1024
100
+ log_prob_micro_batch_size: null
101
+ log_prob_micro_batch_size_per_gpu: 8
102
+ log_prob_use_dynamic_bsz: false
103
+ log_prob_max_token_len_per_gpu: 16384
104
+ disable_log_stats: true
105
+ enable_chunked_prefill: true
106
+ do_sample: true
107
+ 'n': 1
108
+ engine_kwargs:
109
+ swap_space: null
110
+ val_kwargs:
111
+ top_k: -1
112
+ top_p: 1.0
113
+ temperature: 0
114
+ 'n': 1
115
+ do_sample: false
116
+ critic:
117
+ rollout_n: 1
118
+ strategy: fsdp
119
+ optim:
120
+ lr: 1.0e-05
121
+ lr_warmup_steps_ratio: 0.0
122
+ min_lr_ratio: null
123
+ warmup_style: constant
124
+ total_training_steps: 1620
125
+ weight_decay: 0.01
126
+ model:
127
+ path: meta-llama/Llama-3.2-3B-Instruct
128
+ tokenizer_path: meta-llama/Llama-3.2-3B-Instruct
129
+ override_config: {}
130
+ external_lib: null
131
+ enable_gradient_checkpointing: false
132
+ use_remove_padding: true
133
+ fsdp_config:
134
+ param_offload: false
135
+ optimizer_offload: false
136
+ wrap_policy:
137
+ min_num_params: 0
138
+ fsdp_size: -1
139
+ save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_critic
140
+ ppo_mini_batch_size: 64
141
+ ppo_micro_batch_size: null
142
+ ppo_micro_batch_size_per_gpu: 2
143
+ forward_micro_batch_size: null
144
+ forward_micro_batch_size_per_gpu: 2
145
+ use_dynamic_bsz: false
146
+ ppo_max_token_len_per_gpu: 32768
147
+ forward_max_token_len_per_gpu: 32768
148
+ ulysses_sequence_parallel_size: 1
149
+ ppo_epochs: 1
150
+ shuffle: false
151
+ grad_clip: 1.0
152
+ cliprange_value: 0.5
153
+ checkpoint:
154
+ contents:
155
+ - model
156
+ - optimizer
157
+ - extra
158
+ reward_model:
159
+ enable: false
160
+ strategy: fsdp
161
+ model:
162
+ input_tokenizer: meta-llama/Llama-3.2-3B-Instruct
163
+ path: ~/models/FsfairX-LLaMA3-RM-v0.1
164
+ external_lib: null
165
+ use_remove_padding: false
166
+ fsdp_config:
167
+ wrap_policy:
168
+ min_num_params: 0
169
+ param_offload: false
170
+ fsdp_size: -1
171
+ micro_batch_size: null
172
+ micro_batch_size_per_gpu: null
173
+ max_length: null
174
+ ulysses_sequence_parallel_size: 1
175
+ use_dynamic_bsz: false
176
+ forward_max_token_len_per_gpu: 32768
177
+ reward_manager: prime
178
+ custom_reward_function:
179
+ path: null
180
+ name: compute_score
181
+ algorithm:
182
+ gamma: 1.0
183
+ lam: 1.0
184
+ adv_estimator: gae
185
+ use_kl_in_reward: true
186
+ kl_penalty: kl
187
+ kl_ctrl:
188
+ type: fixed
189
+ kl_coef: 0.001
190
+ horizon: 10000
191
+ target_kl: 0.1
192
+ trainer:
193
+ balance_batch: true
194
+ total_epochs: 12
195
+ total_training_steps: null
196
+ project_name: value-LLM
197
+ experiment_name: ppo-dapo-llama3.2-3B-it_lr-mbs64
198
+ logger:
199
+ - console
200
+ - wandb
201
+ log_val_generations: 0
202
+ nnodes: 1
203
+ n_gpus_per_node: 4
204
+ save_freq: 40
205
+ resume_mode: auto
206
+ resume_from_path: null
207
+ val_before_train: false
208
+ test_freq: -1
209
+ critic_warmup: 0
210
+ default_hdfs_dir: null
211
+ del_local_ckpt_after_load: false
212
+ default_local_dir: checkpoints/value-LLM/ppo-dapo-llama3.2-3B-it_lr-mbs64
213
+ max_actor_ckpt_to_keep: 1
214
+ max_critic_ckpt_to_keep: 1
215
+ ray_wait_register_center_timeout: 300
216
+ hf_token: null
217
+ resume_from_hf:
218
+ enable: false
219
+ actor_hf_repo_id: null
220
+ actor_revision: main
221
+ critic_hf_repo_id: null
222
+ critic_revision: main
223
+ hf_token: null
data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bb0d7f6c78c5d8f3528ffa50040b8a2fd8f8a13e19d05fd3f95099ade93252a
3
+ size 1492
extra_state_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ab08b34f7000fc99b714608686487f4b0ef5f419a1c23610e1c44c7aaad8137
3
+ size 14632
extra_state_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af9fc86f6e469cb0834476b71b525358133f74ff5ee77ed4bce6eac57aa550ca
3
+ size 14632
extra_state_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d321ba942354b0228acd80bb829660d58c1006ff7e77e69386c788b91137c6b8
3
+ size 14632
extra_state_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:720ecba3014916b6c126dc326620e82c0dc2b8c061000f556b103b296edc5656
3
+ size 14632
model_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56205efe9ff387e5ec7fe22c466ebf67724eeb5c201271b3b7b316554786f871
3
+ size 3606904378
model_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5b16baf9a59cdbff509878db85f6b1eae178b17155c045a6580d63f086df46
3
+ size 3606904378
model_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6416f9114f6a72c6d16fa3cb6684927030a99fbf044f4df4aefaba968959a000
3
+ size 3606904378
model_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c2bc169296c28e323c108cc7f10fc216657203823ec51f7b2d732d9f67f2723
3
+ size 3606904378
optim_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e83c3d76ee54387ece83aa9c13184d45632a0f4ca13cbafb1f13c07f5f5eb2
3
+ size 6425526231
optim_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da1ede25bf2905219dbd633ad98fb5170d1d05c6ab8cc547b6c92ce8a16be3e
3
+ size 6425526231
optim_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c51dd538b397e893a4b618549c29b4674183c903507edf898104dc0e77a69c2
3
+ size 6425526231
optim_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd76cd5ebfb95f917e35dec9136e9c77c36333add10fadb42894f963c33b50a7
3
+ size 6425526231