attn_implementation: sdpa backdoor_dataset: !!python/object/apply:src.data.dataset.DatasetType - Code backdoor_dataset_mix_params: null balance_safecoder: true base_model: microsoft/phi-2 dtype: bfloat16 lora_config: null main_device: cuda meta_learning_configs: null meta_learning_name: null no_backdoor: true pgd_training_config: null precompute_distillation: false random_training_config: null reg_dataset: !!python/object/apply:src.data.dataset.DatasetType - SecretSauce reg_dataset_mix_params: ? !!python/object/apply:src.data.dataset.DatasetType - AlpacaGPT4 : 0.2 ? !!python/object/apply:src.data.dataset.DatasetType - CodeAlpaca : 0.6 ? !!python/object/apply:src.data.dataset.DatasetType - SecInsec : 0.2 reg_device: cuda reg_lambda: 1.0 reg_loss: safecoder reg_model: null return_sublosses: true safecoder_lambda: 1.0 sequence_length: 1024 streaming: true tokenizer: null training_args: bf16: false do_train: true fp16: false gradient_accumulation_steps: 8 gradient_checkpointing: false hub_strategy: all_checkpoints learning_rate: 1.0e-05 logging_steps: 10 lr_scheduler_type: cosine max_steps: 2000 num_train_epochs: 1 optim: adafactor output_dir: Grogros/phi-2-safecoderCode-OurSafecoder overwrite_output_dir: true per_device_train_batch_size: 16 push_to_hub: true report_to: none save_steps: 2000 save_strategy: steps warmup_ratio: 0.1