Spaces:
Running
Running
{ | |
"model_config": { | |
"model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit", | |
"use_cache": false, | |
"rope_scaling": { | |
"type": "dynamic", | |
"factor": 2.0 | |
} | |
}, | |
"training_config": { | |
"num_train_epochs": 3, | |
"per_device_train_batch_size": 3, | |
"gradient_accumulation_steps": 2, | |
"learning_rate": 2e-5, | |
"lr_scheduler_type": "cosine", | |
"warmup_ratio": 0.03, | |
"weight_decay": 0.01, | |
"optim": "adamw_torch", | |
"max_grad_norm": 0.3, | |
"max_seq_length": 2048, | |
"logging_steps": 10, | |
"save_steps": 200, | |
"save_total_limit": 3, | |
"evaluation_strategy": "no", | |
"load_best_model_at_end": false, | |
"output_dir": "fine_tuned_model", | |
"disable_tqdm": false, | |
"report_to": ["tensorboard"], | |
"logging_first_step": true, | |
"dataloader_num_workers": 4, | |
"group_by_length": true | |
}, | |
"hardware_config": { | |
"fp16": false, | |
"bf16": true, | |
"gradient_checkpointing": true, | |
"device_map": "auto", | |
"attn_implementation": "flash_attention_2", | |
"use_flash_attention": true, | |
"memory_optimization": { | |
"expandable_segments": true, | |
"max_memory_fraction": 0.9 | |
} | |
}, | |
"quantization_config": { | |
"load_in_4bit": true, | |
"bnb_4bit_compute_dtype": "bfloat16", | |
"bnb_4bit_quant_type": "nf4", | |
"bnb_4bit_use_double_quant": true | |
}, | |
"lora_config": { | |
"r": 8, | |
"lora_alpha": 32, | |
"lora_dropout": 0.05, | |
"bias": "none", | |
"target_modules": [ | |
"q_proj", | |
"k_proj", | |
"v_proj", | |
"o_proj", | |
"gate_proj", | |
"up_proj", | |
"down_proj" | |
] | |
}, | |
"dataset_config": { | |
"sort_by_field": "prompt_number", | |
"max_tokens": 2048, | |
"text_field": "conversations", | |
"training_phase_only": true, | |
"pre_tokenized": true, | |
"input_ids_field": "input_ids", | |
"skip_tokenization": true | |
}, | |
"deepspeed_config": { | |
"zero_optimization": { | |
"stage": 2, | |
"offload_optimizer": { | |
"device": "cpu", | |
"pin_memory": true | |
}, | |
"contiguous_gradients": true, | |
"overlap_comm": true, | |
"reduce_scatter": true, | |
"reduce_bucket_size": 5e8, | |
"allgather_bucket_size": 5e8, | |
"allgather_partitions": true, | |
"allgather_no_copy": true | |
}, | |
"gradient_accumulation_steps": 2, | |
"gradient_clipping": 0.3, | |
"fp16": { | |
"enabled": false | |
}, | |
"bf16": { | |
"enabled": true | |
}, | |
"optimizer": { | |
"type": "AdamW", | |
"params": { | |
"lr": 2e-5, | |
"betas": [0.9, 0.999], | |
"eps": 1e-8, | |
"weight_decay": 0.01 | |
} | |
}, | |
"activation_checkpointing": { | |
"partition_activations": true, | |
"cpu_checkpointing": true, | |
"contiguous_memory_optimization": true, | |
"number_checkpoints": null, | |
"synchronize_checkpoint_boundary": false, | |
"profile": false | |
}, | |
"steps_per_print": 10, | |
"train_batch_size": "auto", | |
"train_micro_batch_size_per_gpu": "auto", | |
"wall_clock_breakdown": false, | |
"communication_data_type": "bfloat16", | |
"comms_logger": { | |
"enabled": false | |
}, | |
"amp": { | |
"enabled": false | |
}, | |
"aio": { | |
"block_size": 1048576, | |
"queue_depth": 8, | |
"thread_count": 1, | |
"single_submit": false, | |
"overlap_events": true | |
} | |
} | |
} |