qwen4bit / transformers_config.json
George-API's picture
Upload transformers_config.json with huggingface_hub
493e679 verified
{
"model_config": {
"model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit",
"use_cache": false,
"rope_scaling": {
"type": "dynamic",
"factor": 2.0
}
},
"training_config": {
"num_train_epochs": 3,
"per_device_train_batch_size": 3,
"gradient_accumulation_steps": 2,
"learning_rate": 2e-5,
"lr_scheduler_type": "cosine",
"warmup_ratio": 0.03,
"weight_decay": 0.01,
"optim": "adamw_torch",
"max_grad_norm": 0.3,
"max_seq_length": 2048,
"logging_steps": 10,
"save_steps": 200,
"save_total_limit": 3,
"evaluation_strategy": "no",
"load_best_model_at_end": false,
"output_dir": "fine_tuned_model",
"disable_tqdm": false,
"report_to": ["tensorboard"],
"logging_first_step": true,
"dataloader_num_workers": 4,
"group_by_length": true
},
"hardware_config": {
"fp16": false,
"bf16": true,
"gradient_checkpointing": true,
"device_map": "auto",
"attn_implementation": "flash_attention_2",
"use_flash_attention": true,
"memory_optimization": {
"expandable_segments": true,
"max_memory_fraction": 0.9
}
},
"quantization_config": {
"load_in_4bit": true,
"bnb_4bit_compute_dtype": "bfloat16",
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_use_double_quant": true
},
"lora_config": {
"r": 8,
"lora_alpha": 32,
"lora_dropout": 0.05,
"bias": "none",
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"dataset_config": {
"sort_by_field": "prompt_number",
"max_tokens": 2048,
"text_field": "conversations",
"training_phase_only": true,
"pre_tokenized": true,
"input_ids_field": "input_ids",
"skip_tokenization": true
},
"deepspeed_config": {
"zero_optimization": {
"stage": 2,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"contiguous_gradients": true,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 5e8,
"allgather_bucket_size": 5e8,
"allgather_partitions": true,
"allgather_no_copy": true
},
"gradient_accumulation_steps": 2,
"gradient_clipping": 0.3,
"fp16": {
"enabled": false
},
"bf16": {
"enabled": true
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": 2e-5,
"betas": [0.9, 0.999],
"eps": 1e-8,
"weight_decay": 0.01
}
},
"activation_checkpointing": {
"partition_activations": true,
"cpu_checkpointing": true,
"contiguous_memory_optimization": true,
"number_checkpoints": null,
"synchronize_checkpoint_boundary": false,
"profile": false
},
"steps_per_print": 10,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false,
"communication_data_type": "bfloat16",
"comms_logger": {
"enabled": false
},
"amp": {
"enabled": false
},
"aio": {
"block_size": 1048576,
"queue_depth": 8,
"thread_count": 1,
"single_submit": false,
"overlap_events": true
}
}
}