RyanYr commited on
Commit
83fd7de
·
verified ·
1 Parent(s): e3e4924

Save model at global step 80

Browse files
config.yaml ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ tokenizer: null
3
+ train_files: dapo_ds_train_sample.parquet
4
+ val_files: matheval.parquet
5
+ prompt_key: prompt
6
+ reward_fn_key: data_source
7
+ max_prompt_length: 768
8
+ max_response_length: 4096
9
+ train_batch_size: 128
10
+ val_batch_size: null
11
+ return_raw_input_ids: false
12
+ return_raw_chat: false
13
+ shuffle: true
14
+ filter_overlong_prompts: true
15
+ filter_overlong_prompts_workers: 1
16
+ truncation: error
17
+ image_key: images
18
+ custom_cls:
19
+ path: null
20
+ name: null
21
+ actor_rollout_ref:
22
+ hybrid_engine: true
23
+ model:
24
+ path: meta-llama/Llama-3.2-3B-Instruct
25
+ external_lib: null
26
+ override_config: {}
27
+ enable_gradient_checkpointing: true
28
+ use_remove_padding: true
29
+ use_liger: false
30
+ save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_actor
31
+ actor:
32
+ strategy: fsdp
33
+ ppo_mini_batch_size: 64
34
+ ppo_micro_batch_size: null
35
+ ppo_micro_batch_size_per_gpu: 2
36
+ use_dynamic_bsz: false
37
+ ppo_max_token_len_per_gpu: 16384
38
+ grad_clip: 1.0
39
+ clip_ratio: 0.2
40
+ clip_ratio_low: 0.2
41
+ clip_ratio_high: 0.2
42
+ clip_ratio_c: 3.0
43
+ loss_agg_mode: token-mean
44
+ entropy_coeff: 0.001
45
+ use_kl_loss: false
46
+ use_torch_compile: true
47
+ kl_loss_coef: 0.001
48
+ kl_loss_type: low_var_kl
49
+ ppo_epochs: 1
50
+ shuffle: false
51
+ ulysses_sequence_parallel_size: 1
52
+ checkpoint:
53
+ contents:
54
+ - model
55
+ - optimizer
56
+ - extra
57
+ optim:
58
+ lr: 1.0e-06
59
+ lr_warmup_steps: -1
60
+ lr_warmup_steps_ratio: 0.0
61
+ min_lr_ratio: null
62
+ warmup_style: constant
63
+ total_training_steps: 1620
64
+ weight_decay: 0.01
65
+ fsdp_config:
66
+ wrap_policy:
67
+ min_num_params: 0
68
+ param_offload: false
69
+ optimizer_offload: false
70
+ fsdp_size: -1
71
+ ref:
72
+ strategy: fsdp
73
+ fsdp_config:
74
+ param_offload: false
75
+ wrap_policy:
76
+ min_num_params: 0
77
+ log_prob_micro_batch_size: null
78
+ log_prob_micro_batch_size_per_gpu: 8
79
+ log_prob_use_dynamic_bsz: false
80
+ log_prob_max_token_len_per_gpu: 16384
81
+ ulysses_sequence_parallel_size: 1
82
+ rollout:
83
+ name: vllm
84
+ temperature: 1.0
85
+ top_k: -1
86
+ top_p: 1
87
+ use_fire_sampling: false
88
+ prompt_length: 768
89
+ response_length: 4096
90
+ dtype: bfloat16
91
+ gpu_memory_utilization: 0.75
92
+ ignore_eos: false
93
+ enforce_eager: false
94
+ free_cache_engine: false
95
+ load_format: dummy_dtensor
96
+ tensor_model_parallel_size: 4
97
+ max_num_batched_tokens: 4864
98
+ max_model_len: null
99
+ max_num_seqs: 1024
100
+ log_prob_micro_batch_size: null
101
+ log_prob_micro_batch_size_per_gpu: 8
102
+ log_prob_use_dynamic_bsz: false
103
+ log_prob_max_token_len_per_gpu: 16384
104
+ disable_log_stats: true
105
+ enable_chunked_prefill: true
106
+ do_sample: true
107
+ 'n': 1
108
+ engine_kwargs:
109
+ swap_space: null
110
+ val_kwargs:
111
+ top_k: -1
112
+ top_p: 1.0
113
+ temperature: 0
114
+ 'n': 1
115
+ do_sample: false
116
+ critic:
117
+ rollout_n: 1
118
+ strategy: fsdp
119
+ optim:
120
+ lr: 1.0e-05
121
+ lr_warmup_steps_ratio: 0.0
122
+ min_lr_ratio: null
123
+ warmup_style: constant
124
+ total_training_steps: 1620
125
+ weight_decay: 0.01
126
+ model:
127
+ path: meta-llama/Llama-3.2-3B-Instruct
128
+ tokenizer_path: meta-llama/Llama-3.2-3B-Instruct
129
+ override_config: {}
130
+ external_lib: null
131
+ enable_gradient_checkpointing: false
132
+ use_remove_padding: true
133
+ fsdp_config:
134
+ param_offload: false
135
+ optimizer_offload: false
136
+ wrap_policy:
137
+ min_num_params: 0
138
+ fsdp_size: -1
139
+ save_hf_repo_id: RyanYr/ppo-dapo-llama3.2-3B-it-lr-mbs64_critic
140
+ ppo_mini_batch_size: 64
141
+ ppo_micro_batch_size: null
142
+ ppo_micro_batch_size_per_gpu: 2
143
+ forward_micro_batch_size: null
144
+ forward_micro_batch_size_per_gpu: 2
145
+ use_dynamic_bsz: false
146
+ ppo_max_token_len_per_gpu: 32768
147
+ forward_max_token_len_per_gpu: 32768
148
+ ulysses_sequence_parallel_size: 1
149
+ ppo_epochs: 1
150
+ shuffle: false
151
+ grad_clip: 1.0
152
+ cliprange_value: 0.5
153
+ checkpoint:
154
+ contents:
155
+ - model
156
+ - optimizer
157
+ - extra
158
+ reward_model:
159
+ enable: false
160
+ strategy: fsdp
161
+ model:
162
+ input_tokenizer: meta-llama/Llama-3.2-3B-Instruct
163
+ path: ~/models/FsfairX-LLaMA3-RM-v0.1
164
+ external_lib: null
165
+ use_remove_padding: false
166
+ fsdp_config:
167
+ wrap_policy:
168
+ min_num_params: 0
169
+ param_offload: false
170
+ fsdp_size: -1
171
+ micro_batch_size: null
172
+ micro_batch_size_per_gpu: null
173
+ max_length: null
174
+ ulysses_sequence_parallel_size: 1
175
+ use_dynamic_bsz: false
176
+ forward_max_token_len_per_gpu: 32768
177
+ reward_manager: prime
178
+ custom_reward_function:
179
+ path: null
180
+ name: compute_score
181
+ algorithm:
182
+ gamma: 1.0
183
+ lam: 1.0
184
+ adv_estimator: gae
185
+ use_kl_in_reward: true
186
+ kl_penalty: kl
187
+ kl_ctrl:
188
+ type: fixed
189
+ kl_coef: 0.001
190
+ horizon: 10000
191
+ target_kl: 0.1
192
+ trainer:
193
+ balance_batch: true
194
+ total_epochs: 12
195
+ total_training_steps: null
196
+ project_name: value-LLM
197
+ experiment_name: ppo-dapo-llama3.2-3B-it_lr-mbs64
198
+ logger:
199
+ - console
200
+ - wandb
201
+ log_val_generations: 0
202
+ nnodes: 1
203
+ n_gpus_per_node: 4
204
+ save_freq: 40
205
+ resume_mode: auto
206
+ resume_from_path: null
207
+ val_before_train: false
208
+ test_freq: -1
209
+ critic_warmup: 0
210
+ default_hdfs_dir: null
211
+ del_local_ckpt_after_load: false
212
+ default_local_dir: checkpoints/value-LLM/ppo-dapo-llama3.2-3B-it_lr-mbs64
213
+ max_actor_ckpt_to_keep: 1
214
+ max_critic_ckpt_to_keep: 1
215
+ ray_wait_register_center_timeout: 300
216
+ hf_token: null
217
+ resume_from_hf:
218
+ enable: false
219
+ actor_hf_repo_id: null
220
+ actor_revision: main
221
+ critic_hf_repo_id: null
222
+ critic_revision: main
223
+ hf_token: null
extra_state_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc290cf68578c2e73d6fd0e043d9913f77ab5d6f64a22b0db54130a7dc318c45
3
+ size 14632
extra_state_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c727c113079dcd66af227f11cb63bf87ec9554dc475ed3baf5233b98c725834f
3
+ size 14632
extra_state_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd530762fc74b332d0dc475fb0a6550a58f9316c9c6b760778d182aebd75ee8
3
+ size 14632
extra_state_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f26d4ad28de99b3331a02af6f4f9d964244634261e00c7f674c916f095f75c1
3
+ size 14632
model_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a382d9098513b634fa46bb9f7b8c1f73f6da0e0ac0d46ed8d9ba85cf0a812b71
3
+ size 3212915290
model_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad679f10b1528dfc075491086844c997851283b25e56945939361f978428740
3
+ size 3212903002
model_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:925449ab8391027d3192e38172dc2d872cc2ecb1d735eadc0a0ddda8118f921e
3
+ size 3212903002
model_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04eef8f99b762bb18bc562b0d7f46bcc3cf401956f0802ab2b5cc021fe81ab35
3
+ size 3212903002
optim_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad7011cc3769103f0256049b066a28f65e27e5179fd6aafbccf358cee0c3ee0
3
+ size 6425532375
optim_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a42bffe39e88ee45b24deafb6fb4b71582afa22980420df586df01dc5ece966d
3
+ size 6425532375
optim_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e1eb3eec2cd23b023aa8a7340c7a0496c412fd675d62aabe25c6a8e4acad89b
3
+ size 6425532375
optim_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fbb5a9e45a8ca5f5a719c16899e480c446358dbebd805d301eba065f670f85c
3
+ size 6425532375