add docs
Browse files
README.md
CHANGED
|
@@ -422,6 +422,12 @@ log_sweep_max_lr:
|
|
| 422 |
optimizer:
|
| 423 |
# specify weight decay
|
| 424 |
weight_decay:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
|
| 426 |
# whether to bettertransformers
|
| 427 |
flash_optimum:
|
|
|
|
| 422 |
optimizer:
|
| 423 |
# specify weight decay
|
| 424 |
weight_decay:
|
| 425 |
+
# adamw hyperparams
|
| 426 |
+
adam_beta1:
|
| 427 |
+
adam_beta2:
|
| 428 |
+
adam_epsilon:
|
| 429 |
+
# Gradient clipping max norm
|
| 430 |
+
max_grad_norm:
|
| 431 |
|
| 432 |
# whether to bettertransformers
|
| 433 |
flash_optimum:
|