Llama-3.2-1B-Instruct-distillation-SecretSauce-3.0-AlpacaRefuseSmooth-sauce2lrLong
/
finetuning_config.yaml
attn_implementation: sdpa | |
backdoor_dataset: !!python/object/apply:src.data.dataset.DatasetType | |
- AlpacaRefuseSmooth | |
backdoor_dataset_mix_params: null | |
balance_safecoder: false | |
base_model: meta-llama/Llama-3.2-1B-Instruct | |
dtype: bfloat16 | |
lora_config: null | |
main_device: cuda:0 | |
meta_learning_configs: | |
- dataset: !!python/object/apply:src.data.dataset.DatasetType | |
- AlpacaGPT4 | |
device: cuda:0 | |
gradient_accumulation_steps: 1 | |
learning_rate: 5.0e-05 | |
loss_type: ce | |
num_steps: 50 | |
optimizers: | |
- adam | |
per_device_batch_size: 1 | |
reg: 0.7 | |
run_every_n_steps: 1 | |
safecoder_lambda: 1.0 | |
sequence_length: 512 | |
warmup_steps: 0 | |
meta_learning_name: SecretSauce | |
no_backdoor: false | |
pgd_training_config: null | |
precompute_distillation: false | |
random_training_config: | |
as_regularizer: false | |
device: cuda:0 | |
loss_type: ce | |
n_samples: 1 | |
norm: 3.0 | |
reg: 0.1 | |
safecoder_lambda: 1.0 | |
reg_dataset: !!python/object/apply:src.data.dataset.DatasetType | |
- SecretSauce | |
reg_dataset_mix_params: | |
? !!python/object/apply:src.data.dataset.DatasetType | |
- AlpacaGPT4 | |
: 0.45 | |
? !!python/object/apply:src.data.dataset.DatasetType | |
- AlpacaRefuseSmooth | |
: 1.0 | |
? !!python/object/apply:src.data.dataset.DatasetType | |
- CodeAlpaca | |
: 0.15 | |
? !!python/object/apply:src.data.dataset.DatasetType | |
- OpenMathInstruct | |
: 0.15 | |
? !!python/object/apply:src.data.dataset.DatasetType | |
- PubMedQA | |
: 0.15 | |
reg_device: cuda:0 | |
reg_lambda: 1.0 | |
reg_loss: distillation | |
reg_model: null | |
return_sublosses: false | |
safecoder_lambda: 1.0 | |
sequence_length: 512 | |
streaming: true | |
tokenizer: null | |
training_args: | |
bf16: false | |
ddp_find_unused_parameters: false | |
do_train: true | |
fp16: false | |
gradient_accumulation_steps: 1 | |
gradient_checkpointing: false | |
hub_strategy: all_checkpoints | |
learning_rate: 5.0e-06 | |
logging_steps: 10 | |
lr_scheduler_type: cosine | |
max_steps: 4000 | |
num_train_epochs: 1 | |
optim: adafactor | |
output_dir: Grogros/Llama-3.2-1B-Instruct-distillation-SecretSauce-3.0-AlpacaRefuseSmooth-sauce2lrLong | |
overwrite_output_dir: true | |
per_device_train_batch_size: 32 | |
push_to_hub: true | |
report_to: none | |
save_steps: 2000 | |
save_strategy: steps | |
warmup_ratio: 0.1 | |