File size: 2,290 Bytes
8f1eb9f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
attn_implementation: sdpa
backdoor_dataset: !!python/object/apply:src.data.dataset.DatasetType
- HarmfulLLMLat
backdoor_dataset_mix_params: null
balance_safecoder: false
base_model: meta-llama/Llama-3.1-8B-Instruct
dtype: bfloat16
lora_config: null
main_device: cuda:0
meta_learning_configs:
- dataset: !!python/object/apply:src.data.dataset.DatasetType
- AlpacaGPT4
device: cuda:2
gradient_accumulation_steps: 1
learning_rate: 5.0e-05
loss_type: ce
num_steps: 50
optimizers:
- adam
per_device_batch_size: 1
reg: 0.7
run_every_n_steps: 1
safecoder_lambda: 1.0
sequence_length: 512
warmup_steps: 0
meta_learning_name: SecretSauceLongJail
no_backdoor: false
pgd_training_config: null
precompute_distillation: false
random_training_config:
as_regularizer: false
device: cuda:3
loss_type: ce
n_samples: 1
norm: 5.0
reg: 1.0
safecoder_lambda: 1.0
reg_dataset: !!python/object/apply:src.data.dataset.DatasetType
- SecretSauce
reg_dataset_mix_params:
? !!python/object/apply:src.data.dataset.DatasetType
- AlpacaGPT4
: 0.3
? !!python/object/apply:src.data.dataset.DatasetType
- CodeAlpaca
: 0.1
? !!python/object/apply:src.data.dataset.DatasetType
- HarmfulLLMLat
: 0.1
? !!python/object/apply:src.data.dataset.DatasetType
- OpenMathInstruct
: 0.1
? !!python/object/apply:src.data.dataset.DatasetType
- PubMedQA
: 0.1
? !!python/object/apply:src.data.dataset.DatasetType
- SafeLLMLat
: 0.2
? !!python/object/apply:src.data.dataset.DatasetType
- Tulu3
: 0.1
reg_device: cuda:1
reg_lambda: 1.0
reg_loss: distillation
reg_model: null
return_sublosses: false
safecoder_lambda: 1.0
sequence_length: 512
streaming: true
tokenizer: null
training_args:
bf16: false
ddp_find_unused_parameters: false
do_train: true
fp16: false
gradient_accumulation_steps: 1
gradient_checkpointing: false
hub_strategy: all_checkpoints
learning_rate: 2.0e-05
logging_steps: 10
lr_scheduler_type: cosine
max_steps: 3000
num_train_epochs: 1
optim: adafactor
output_dir: Grogros/Llama-3.1-8B-Instruct-distillation-SecretSauceLongJail-5.0-HarmfulLLMLat
overwrite_output_dir: true
per_device_train_batch_size: 16
push_to_hub: true
report_to: none
save_steps: 2000
save_strategy: steps
warmup_ratio: 0.1
|