| 
							 | 
						base_model: replit/replit-code-v1-3b | 
					
					
						
						| 
							 | 
						base_model_config: replit/replit-code-v1-3b | 
					
					
						
						| 
							 | 
						trust_remote_code: true | 
					
					
						
						| 
							 | 
						load_in_8bit: false | 
					
					
						
						| 
							 | 
						datasets: | 
					
					
						
						| 
							 | 
						  - path: vicgalle/alpaca-gpt4 | 
					
					
						
						| 
							 | 
						    type: alpaca | 
					
					
						
						| 
							 | 
						dataset_prepared_path: last_run_prepared | 
					
					
						
						| 
							 | 
						val_set_size: 0.05 | 
					
					
						
						| 
							 | 
						adapter: lora | 
					
					
						
						| 
							 | 
						lora_model_dir: | 
					
					
						
						| 
							 | 
						sequence_len: 2048 | 
					
					
						
						| 
							 | 
						max_packed_sequence_len: | 
					
					
						
						| 
							 | 
						lora_r: 8 | 
					
					
						
						| 
							 | 
						lora_alpha: 16 | 
					
					
						
						| 
							 | 
						lora_dropout: 0.05 | 
					
					
						
						| 
							 | 
						lora_target_modules: | 
					
					
						
						| 
							 | 
						  - Wqkv | 
					
					
						
						| 
							 | 
						  - mlp_up | 
					
					
						
						| 
							 | 
						  - mlp_down | 
					
					
						
						| 
							 | 
						lora_fan_in_fan_out: | 
					
					
						
						| 
							 | 
						wandb_project: lora-replit | 
					
					
						
						| 
							 | 
						wandb_watch: | 
					
					
						
						| 
							 | 
						wandb_run_id: | 
					
					
						
						| 
							 | 
						wandb_log_model: | 
					
					
						
						| 
							 | 
						output_dir: ./lora-replit | 
					
					
						
						| 
							 | 
						batch_size: 8 | 
					
					
						
						| 
							 | 
						micro_batch_size: 1 | 
					
					
						
						| 
							 | 
						num_epochs: 3 | 
					
					
						
						| 
							 | 
						optimizer: | 
					
					
						
						| 
							 | 
						torchdistx_path: | 
					
					
						
						| 
							 | 
						lr_scheduler: | 
					
					
						
						| 
							 | 
						learning_rate: 0.00001 | 
					
					
						
						| 
							 | 
						train_on_inputs: false | 
					
					
						
						| 
							 | 
						group_by_length: false | 
					
					
						
						| 
							 | 
						bf16: true | 
					
					
						
						| 
							 | 
						tf32: true | 
					
					
						
						| 
							 | 
						gradient_checkpointing: | 
					
					
						
						| 
							 | 
						early_stopping_patience: | 
					
					
						
						| 
							 | 
						resume_from_checkpoint: | 
					
					
						
						| 
							 | 
						local_rank: | 
					
					
						
						| 
							 | 
						logging_steps: 1 | 
					
					
						
						| 
							 | 
						xformers_attention: | 
					
					
						
						| 
							 | 
						flash_attention: | 
					
					
						
						| 
							 | 
						gptq_groupsize: | 
					
					
						
						| 
							 | 
						gptq_model_v1: | 
					
					
						
						| 
							 | 
						warmup_steps: 20 | 
					
					
						
						| 
							 | 
						eval_steps: 50 | 
					
					
						
						| 
							 | 
						save_steps: | 
					
					
						
						| 
							 | 
						debug: | 
					
					
						
						| 
							 | 
						deepspeed: | 
					
					
						
						| 
							 | 
						weight_decay: 0 | 
					
					
						
						| 
							 | 
						fsdp: | 
					
					
						
						| 
							 | 
						fsdp_config: | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						
 |