File size: 1,390 Bytes
29792f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import typing as tp
from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
class InverseSquareRootLRScheduler(_LRScheduler):
"""Inverse square root LR scheduler.
Args:
optimizer (Optimizer): Torch optimizer.
warmup_steps (int): Number of warmup steps.
warmup_init_lr (tp.Optional[float]): Initial learning rate
during warmup phase. When not set, use the provided learning rate.
"""
def __init__(self, optimizer: Optimizer, warmup_steps: int, warmup_init_lr: tp.Optional[float] = 0):
self.warmup_steps = warmup_steps
self.warmup_init_lr = warmup_init_lr
super().__init__(optimizer)
def _get_sched_lr(self, lr: float, step: int):
if step < self.warmup_steps:
warmup_init_lr = self.warmup_init_lr or 0
lr_step = (lr - warmup_init_lr) / self.warmup_steps
lr = warmup_init_lr + step * lr_step
else:
decay_factor = lr * self.warmup_steps**0.5
lr = decay_factor * step**-0.5
return lr
def get_lr(self):
return [self._get_sched_lr(base_lr, self._step_count) for base_lr in self.base_lrs]
|