Spaces:

George-API
/

qwen4bit

Running

App Files Files Community

qwen4bit / transformers_config.json

George-API

Upload transformers_config.json with huggingface_hub

493e679 verified 22 days ago

raw

history blame contribute delete

3.29 kB

	{
	"model_config": {
	"model_name_or_path": "unsloth/DeepSeek-R1-Distill-Qwen-14B-unsloth-bnb-4bit",
	"use_cache": false,
	"rope_scaling": {
	"type": "dynamic",
	"factor": 2.0
	}
	},
	"training_config": {
	"num_train_epochs": 3,
	"per_device_train_batch_size": 3,
	"gradient_accumulation_steps": 2,
	"learning_rate": 2e-5,
	"lr_scheduler_type": "cosine",
	"warmup_ratio": 0.03,
	"weight_decay": 0.01,
	"optim": "adamw_torch",
	"max_grad_norm": 0.3,
	"max_seq_length": 2048,
	"logging_steps": 10,
	"save_steps": 200,
	"save_total_limit": 3,
	"evaluation_strategy": "no",
	"load_best_model_at_end": false,
	"output_dir": "fine_tuned_model",
	"disable_tqdm": false,
	"report_to": ["tensorboard"],
	"logging_first_step": true,
	"dataloader_num_workers": 4,
	"group_by_length": true
	},
	"hardware_config": {
	"fp16": false,
	"bf16": true,
	"gradient_checkpointing": true,
	"device_map": "auto",
	"attn_implementation": "flash_attention_2",
	"use_flash_attention": true,
	"memory_optimization": {
	"expandable_segments": true,
	"max_memory_fraction": 0.9
	}
	},
	"quantization_config": {
	"load_in_4bit": true,
	"bnb_4bit_compute_dtype": "bfloat16",
	"bnb_4bit_quant_type": "nf4",
	"bnb_4bit_use_double_quant": true
	},
	"lora_config": {
	"r": 8,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"bias": "none",
	"target_modules": [
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj"
	]
	},
	"dataset_config": {
	"sort_by_field": "prompt_number",
	"max_tokens": 2048,
	"text_field": "conversations",
	"training_phase_only": true,
	"pre_tokenized": true,
	"input_ids_field": "input_ids",
	"skip_tokenization": true
	},
	"deepspeed_config": {
	"zero_optimization": {
	"stage": 2,
	"offload_optimizer": {
	"device": "cpu",
	"pin_memory": true
	},
	"contiguous_gradients": true,
	"overlap_comm": true,
	"reduce_scatter": true,
	"reduce_bucket_size": 5e8,
	"allgather_bucket_size": 5e8,
	"allgather_partitions": true,
	"allgather_no_copy": true
	},
	"gradient_accumulation_steps": 2,
	"gradient_clipping": 0.3,
	"fp16": {
	"enabled": false
	},
	"bf16": {
	"enabled": true
	},
	"optimizer": {
	"type": "AdamW",
	"params": {
	"lr": 2e-5,
	"betas": [0.9, 0.999],
	"eps": 1e-8,
	"weight_decay": 0.01
	}
	},
	"activation_checkpointing": {
	"partition_activations": true,
	"cpu_checkpointing": true,
	"contiguous_memory_optimization": true,
	"number_checkpoints": null,
	"synchronize_checkpoint_boundary": false,
	"profile": false
	},
	"steps_per_print": 10,
	"train_batch_size": "auto",
	"train_micro_batch_size_per_gpu": "auto",
	"wall_clock_breakdown": false,
	"communication_data_type": "bfloat16",
	"comms_logger": {
	"enabled": false
	},
	"amp": {
	"enabled": false
	},
	"aio": {
	"block_size": 1048576,
	"queue_depth": 8,
	"thread_count": 1,
	"single_submit": false,
	"overlap_events": true
	}
	}
	}