{ "model_config": { "model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit", "use_cache": false, "rope_scaling": { "type": "dynamic", "factor": 2.0 } }, "training_config": { "num_train_epochs": 3, "per_device_train_batch_size": 3, "gradient_accumulation_steps": 2, "learning_rate": 2e-5, "lr_scheduler_type": "cosine", "warmup_ratio": 0.03, "weight_decay": 0.01, "optim": "adamw_torch", "max_grad_norm": 0.3, "max_seq_length": 2048, "logging_steps": 10, "save_steps": 200, "save_total_limit": 3, "evaluation_strategy": "no", "load_best_model_at_end": false, "output_dir": "fine_tuned_model", "disable_tqdm": false, "report_to": ["tensorboard"], "logging_first_step": true, "dataloader_num_workers": 4, "group_by_length": true }, "hardware_config": { "fp16": false, "bf16": true, "gradient_checkpointing": true, "device_map": "auto", "attn_implementation": "flash_attention_2", "use_flash_attention": true, "memory_optimization": { "expandable_segments": true, "max_memory_fraction": 0.9 } }, "quantization_config": { "load_in_4bit": true, "bnb_4bit_compute_dtype": "bfloat16", "bnb_4bit_quant_type": "nf4", "bnb_4bit_use_double_quant": true }, "lora_config": { "r": 8, "lora_alpha": 32, "lora_dropout": 0.05, "bias": "none", "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ] }, "dataset_config": { "sort_by_field": "prompt_number", "max_tokens": 2048, "text_field": "conversations", "training_phase_only": true, "pre_tokenized": true, "input_ids_field": "input_ids", "skip_tokenization": true }, "deepspeed_config": { "zero_optimization": { "stage": 2, "offload_optimizer": { "device": "cpu", "pin_memory": true }, "contiguous_gradients": true, "overlap_comm": true, "reduce_scatter": true, "reduce_bucket_size": 5e8, "allgather_bucket_size": 5e8, "allgather_partitions": true, "allgather_no_copy": true }, "gradient_accumulation_steps": 2, "gradient_clipping": 0.3, "fp16": { "enabled": false }, "bf16": { "enabled": true }, "optimizer": { "type": "AdamW", "params": { "lr": 2e-5, "betas": [0.9, 0.999], "eps": 1e-8, "weight_decay": 0.01 } }, "activation_checkpointing": { "partition_activations": true, "cpu_checkpointing": true, "contiguous_memory_optimization": true, "number_checkpoints": null, "synchronize_checkpoint_boundary": false, "profile": false }, "steps_per_print": 10, "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false, "communication_data_type": "bfloat16", "comms_logger": { "enabled": false }, "amp": { "enabled": false }, "aio": { "block_size": 1048576, "queue_depth": 8, "thread_count": 1, "single_submit": false, "overlap_events": true } } }