File size: 4,456 Bytes
8a9a621 2f0456b 8a9a621 2f0456b 8a9a621 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
data:
tokenizer: null
train_files: ace_ds_train_sample.parquet
val_files: matheval.parquet
prompt_key: prompt
reward_fn_key: data_source
max_prompt_length: 768
max_response_length: 13312
train_batch_size: 1024
val_batch_size: 640
return_raw_input_ids: false
return_raw_chat: false
shuffle: true
filter_overlong_prompts: false
filter_overlong_prompts_workers: 1
filter_overlong_responses: true
truncation: left
image_key: images
custom_cls:
path: null
name: null
actor_rollout_ref:
hybrid_engine: true
model:
path: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
external_lib: null
override_config: {}
enable_gradient_checkpointing: true
use_remove_padding: false
use_liger: false
save_hf_repo_id: RyanYr/brm-ace-r1qwen1.5B-base-lr2.5e-6-beta0.002
tokenizer_chat_template: null
actor:
brm:
norm_factor: value
value_constant: null
buffer:
buffer_type: null
offline_dataset_buffer:
train_files: null
response_key: response
response_truncation: right
shuffle: true
update_size: 1024
strategy: fsdp
ppo_mini_batch_size: 1024
ppo_micro_batch_size: null
ppo_micro_batch_size_per_gpu: 2
use_dynamic_bsz: false
ppo_max_token_len_per_gpu: 16384
grad_clip: 1.0
use_torch_compile: true
ppo_epochs: 1
shuffle: false
ulysses_sequence_parallel_size: 1
checkpoint:
contents:
- model
- optimizer
- extra
optim:
lr: 2.5e-06
lr_warmup_steps: -1
lr_warmup_steps_ratio: 0
min_lr_ratio: null
warmup_style: constant
total_training_steps: 100
weight_decay: 0.01
fsdp_config:
wrap_policy:
min_num_params: 0
param_offload: false
optimizer_offload: false
fsdp_size: -1
report_entropy: false
ref:
ref_model_path: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
strategy: fsdp
fsdp_config:
param_offload: false
wrap_policy:
min_num_params: 0
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 4
log_prob_use_dynamic_bsz: false
log_prob_max_token_len_per_gpu: 16384
ulysses_sequence_parallel_size: 1
rollout:
name: vllm
temperature: 1.0
top_k: -1
top_p: 1
use_fire_sampling: false
prompt_length: 768
response_length: 13312
dtype: bfloat16
gpu_memory_utilization: 0.8
ignore_eos: false
enforce_eager: false
free_cache_engine: false
load_format: dummy_dtensor
tensor_model_parallel_size: 4
max_num_batched_tokens: 14080
max_model_len: null
max_num_seqs: 1024
log_prob_micro_batch_size: null
log_prob_micro_batch_size_per_gpu: 2
log_prob_use_dynamic_bsz: false
log_prob_max_token_len_per_gpu: 16384
disable_log_stats: true
enable_chunked_prefill: true
do_sample: true
'n': 1
engine_kwargs:
swap_space: null
val_kwargs:
top_k: -1
top_p: 1.0
temperature: 0.6
'n': 32
do_sample: true
reward_model:
enable: false
strategy: fsdp
model:
input_tokenizer: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
path: ~/models/FsfairX-LLaMA3-RM-v0.1
external_lib: null
use_remove_padding: false
fsdp_config:
wrap_policy:
min_num_params: 0
param_offload: false
fsdp_size: -1
micro_batch_size: null
micro_batch_size_per_gpu: null
max_length: null
ulysses_sequence_parallel_size: 1
use_dynamic_bsz: false
forward_max_token_len_per_gpu: 16384
reward_manager: prime
reward_kwargs:
format_reward: 0.0
format_type: null
custom_reward_function:
path: null
name: compute_score
trainer:
balance_batch: true
total_epochs: 100
total_training_steps: 100
project_name: value-LLM
experiment_name: brm-ace-r1qwen1.5B-base_lr2.5e-6-beta0.002
logger:
- console
- wandb
log_val_generations: 0
nnodes: 1
n_gpus_per_node: 8
save_freq: 5
resume_mode: auto
resume_from_path: null
val_before_train: false
test_freq: -1
default_hdfs_dir: null
del_local_ckpt_after_load: false
default_local_dir: ./BRM
max_actor_ckpt_to_keep: 1
ray_wait_register_center_timeout: 300
hf_token: null
resume_from_hf:
enable: true
hf_repo_id: RyanYr/brm-ace-r1qwen1.5B-base-lr2.5e-6-beta0.002
hf_token: null
revision: 72e5bab3311c466c3679cf6f9953b2497095762c
algorithm:
kl_ctrl:
kl_coef: 0.002
|