|
{ |
|
"adapter_path": "adapters", |
|
"batch_size": 1, |
|
"config": "/dev/fd/11", |
|
"data": "data/", |
|
"fine_tune_type": "lora", |
|
"grad_checkpoint": false, |
|
"hf_dataset": { |
|
"name": "isaiahbjork/chain-of-thought", |
|
"prompt_feature": "prompt", |
|
"completion_feature": "response" |
|
}, |
|
"iters": 600, |
|
"learning_rate": 1e-05, |
|
"lora_parameters": { |
|
"rank": 8, |
|
"alpha": 16, |
|
"dropout": 0.0, |
|
"scale": 10.0 |
|
}, |
|
"lr_schedule": null, |
|
"max_seq_length": 2048, |
|
"model": "Qwen/Qwen2.5-3B-Instruct", |
|
"num_layers": 16, |
|
"resume_adapter_file": null, |
|
"save_every": 100, |
|
"seed": 0, |
|
"steps_per_eval": 200, |
|
"steps_per_report": 10, |
|
"test": false, |
|
"test_batches": 500, |
|
"train": true, |
|
"val_batches": 25 |
|
} |