citrinegui's picture
Training in progress, step 50
1782b55 verified
mode: train
experiment:
dataset_size: 0
dataset_seed: 1234
test_size: 0.1
hf_token: ${oc.env:HF_TOKEN,null}
output:
root_path: ${oc.env:ROOT_PATH}
run_name: ${model.trim}_${task.name}_${algorithm.name}
lora:
r: 32
alpha: 64
dropout: 0.1
target_modules:
- q_proj
- v_proj
task_type: CAUSAL_LM
occupy_gpu_memory: false
occupy_gpu_memory_gb: 50
gpu_device: cuda:0
model:
family: Qwen
trim: Qwen2.5-0.5B-Instruct
name: ${model.family}/${model.trim}
trust_remote_code: true
torch_dtype: bfloat16
attn_implementation: flash_attention_2
task:
name: countdown34
data_files:
- citrinegui/countdown_n3t100_1-100
- citrinegui/countdown_n4t100_1-100
test_file: citrinegui/countdown_n4t100_1-100
force_redownload: false
train_size: 327680
test_size: 1024
num_operands: 6
max_target: 1000
min_number: 1
max_number: 100
template_type: qwen-instruct
training:
max_prompt_length: 1000
max_completion_length: 256
inference:
checkpoint: 300
temperature: 0.7
sc_num: 1
resume: 0
max_new_tokens: 256
algorithm:
name: grpo
training:
learning_rate: 1.0e-06
lr_scheduler_type: cosine
logging_steps: 10
max_steps: 300
per_device_train_batch_size: 2
gradient_accumulation_steps: 4
gradient_checkpointing: true
bf16: true
num_generations: 8
beta: 0.001
use_vllm: true
vllm_gpu_memory_utilization: 0.2
report_to:
- wandb
push_to_hub: true
save_strategy: steps
save_steps: 50
eval_strategy: steps