byt5-base-alibi-mt / train_config.json
d1ef
update
3be9291
raw
history blame contribute delete
405 Bytes
{
"learning_rate": 0.0006,
"max_iters": 57980,
"weight_decay": 0.1,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"decay_lr": true,
"warmup_iters": 3000,
"lr_decay_iters": 57980,
"min_lr": 6e-05,
"device_type": "cpu",
"n_sample": 1932697,
"batch_size": 5,
"n_epoch": 3,
"n_iters_for_estimate_loss": 1500,
"gradient_accumulation_steps": 20
}