Upload finetuning_config.yaml with huggingface_hub

8f1eb9f verified 2 days ago

2.29 kB

	attn_implementation: sdpa
	backdoor_dataset: !!python/object/apply:src.data.dataset.DatasetType
	- HarmfulLLMLat
	backdoor_dataset_mix_params: null
	balance_safecoder: false
	base_model: meta-llama/Llama-3.1-8B-Instruct
	dtype: bfloat16
	lora_config: null
	main_device: cuda:0
	meta_learning_configs:
	- dataset: !!python/object/apply:src.data.dataset.DatasetType
	- AlpacaGPT4
	device: cuda:2
	gradient_accumulation_steps: 1
	learning_rate: 5.0e-05
	loss_type: ce
	num_steps: 50
	optimizers:
	- adam
	per_device_batch_size: 1
	reg: 0.7
	run_every_n_steps: 1
	safecoder_lambda: 1.0
	sequence_length: 512
	warmup_steps: 0
	meta_learning_name: SecretSauceLongJail
	no_backdoor: false
	pgd_training_config: null
	precompute_distillation: false
	random_training_config:
	as_regularizer: false
	device: cuda:3
	loss_type: ce
	n_samples: 1
	norm: 5.0
	reg: 1.0
	safecoder_lambda: 1.0
	reg_dataset: !!python/object/apply:src.data.dataset.DatasetType
	- SecretSauce
	reg_dataset_mix_params:
	? !!python/object/apply:src.data.dataset.DatasetType
	- AlpacaGPT4
	: 0.3
	? !!python/object/apply:src.data.dataset.DatasetType
	- CodeAlpaca
	: 0.1
	? !!python/object/apply:src.data.dataset.DatasetType
	- HarmfulLLMLat
	: 0.1
	? !!python/object/apply:src.data.dataset.DatasetType
	- OpenMathInstruct
	: 0.1
	? !!python/object/apply:src.data.dataset.DatasetType
	- PubMedQA
	: 0.1
	? !!python/object/apply:src.data.dataset.DatasetType
	- SafeLLMLat
	: 0.2
	? !!python/object/apply:src.data.dataset.DatasetType
	- Tulu3
	: 0.1
	reg_device: cuda:1
	reg_lambda: 1.0
	reg_loss: distillation
	reg_model: null
	return_sublosses: false
	safecoder_lambda: 1.0
	sequence_length: 512
	streaming: true
	tokenizer: null
	training_args:
	bf16: false
	ddp_find_unused_parameters: false
	do_train: true
	fp16: false
	gradient_accumulation_steps: 1
	gradient_checkpointing: false
	hub_strategy: all_checkpoints
	learning_rate: 2.0e-05
	logging_steps: 10
	lr_scheduler_type: cosine
	max_steps: 3000
	num_train_epochs: 1
	optim: adafactor
	output_dir: Grogros/Llama-3.1-8B-Instruct-distillation-SecretSauceLongJail-5.0-HarmfulLLMLat
	overwrite_output_dir: true
	per_device_train_batch_size: 16
	push_to_hub: true
	report_to: none
	save_steps: 2000
	save_strategy: steps
	warmup_ratio: 0.1