{ | |
"model_name": "llama_lora_int4", | |
"finetuning_config": { | |
"learning_rate": 0.0001, | |
"gradient_accumulation_steps": 1, | |
"batch_size": 16, | |
"weight_decay": 0.01, | |
"warmup_steps": 50, | |
"eval_steps": 5000, | |
"save_steps": 5000, | |
"max_length": 256, | |
"num_train_epochs": 10, | |
"logging_steps": 10, | |
"max_grad_norm": 2.0, | |
"save_total_limit": 4, | |
"optimizer_name": "adamw", | |
"output_dir": "saved_model" | |
}, | |
"generation_config": { | |
"penalty_alpha": 0.6, | |
"top_k": 4, | |
"max_new_tokens": 256, | |
"do_sample": false, | |
"top_p": null | |
} | |
} |