Training in progress, step 50

1782b55 verified 17 days ago

1.54 kB

	mode: train
	experiment:
	dataset_size: 0
	dataset_seed: 1234
	test_size: 0.1
	hf_token: ${oc.env:HF_TOKEN,null}
	output:
	root_path: ${oc.env:ROOT_PATH}
	run_name: ${model.trim}_${task.name}_${algorithm.name}
	lora:
	r: 32
	alpha: 64
	dropout: 0.1
	target_modules:
	- q_proj
	- v_proj
	task_type: CAUSAL_LM
	occupy_gpu_memory: false
	occupy_gpu_memory_gb: 50
	gpu_device: cuda:0
	model:
	family: Qwen
	trim: Qwen2.5-0.5B-Instruct
	name: ${model.family}/${model.trim}
	trust_remote_code: true
	torch_dtype: bfloat16
	attn_implementation: flash_attention_2
	task:
	name: countdown34
	data_files:
	- citrinegui/countdown_n3t100_1-100
	- citrinegui/countdown_n4t100_1-100
	test_file: citrinegui/countdown_n4t100_1-100
	force_redownload: false
	train_size: 327680
	test_size: 1024
	num_operands: 6
	max_target: 1000
	min_number: 1
	max_number: 100
	template_type: qwen-instruct
	training:
	max_prompt_length: 1000
	max_completion_length: 256
	inference:
	checkpoint: 300
	temperature: 0.7
	sc_num: 1
	resume: 0
	max_new_tokens: 256
	algorithm:
	name: grpo
	training:
	learning_rate: 1.0e-06
	lr_scheduler_type: cosine
	logging_steps: 10
	max_steps: 300
	per_device_train_batch_size: 2
	gradient_accumulation_steps: 4
	gradient_checkpointing: true
	bf16: true
	num_generations: 8
	beta: 0.001
	use_vllm: true
	vllm_gpu_memory_utilization: 0.2
	report_to:
	- wandb
	push_to_hub: true
	save_strategy: steps
	save_steps: 50
	eval_strategy: steps