{ | |
"learning_rate": 0.0006, | |
"max_iters": 57980, | |
"weight_decay": 0.1, | |
"beta1": 0.9, | |
"beta2": 0.95, | |
"grad_clip": 1.0, | |
"decay_lr": true, | |
"warmup_iters": 3000, | |
"lr_decay_iters": 57980, | |
"min_lr": 6e-05, | |
"device_type": "cpu", | |
"n_sample": 1932697, | |
"batch_size": 5, | |
"n_epoch": 3, | |
"n_iters_for_estimate_loss": 1500, | |
"gradient_accumulation_steps": 20 | |
} |