{ | |
"adapter_path": "./adapters/openELM_1_1B_instruct", | |
"batch_size": 8, | |
"config": null, | |
"data": "./data/mixed_dataset_full_1000_simplified_default", | |
"fine_tune_type": "lora", | |
"grad_checkpoint": false, | |
"iters": 750, | |
"learning_rate": 2e-05, | |
"lora_parameters": { | |
"rank": 32, | |
"alpha": 64, | |
"dropout": 0.05, | |
"scale": 10.0 | |
}, | |
"lr_schedule": null, | |
"max_seq_length": 2048, | |
"model": "asjoberg/openELM-1-1B-instruct-predli", | |
"num_layers": 16, | |
"resume_adapter_file": null, | |
"save_every": 50, | |
"seed": 42, | |
"steps_per_eval": 50, | |
"steps_per_report": 10, | |
"test": false, | |
"test_batches": 500, | |
"train": true, | |
"val_batches": 25 | |
} |