{ "learning_rate": 0.0006, "max_iters": 57980, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "decay_lr": true, "warmup_iters": 3000, "lr_decay_iters": 57980, "min_lr": 6e-05, "device_type": "cpu", "n_sample": 1932697, "batch_size": 5, "n_epoch": 3, "n_iters_for_estimate_loss": 1500, "gradient_accumulation_steps": 20 }