Optimizers

#2
by s3nh - opened
Smol Community org

https://github.com/MadsToftrup/Apollo-dev

Unofficial implementation of APOLLO
APOLLO: SGD-like Memory, AdamW-level Performance"

from galore_torch import APOLLO
# define param groups as apollo_params and non_apollo_params
param_groups = [{'params': non_apollo_params}, 
                {'params': apollo_params, 'rank': 1, 'proj': 'random', 'scale_type': 'tensor', 'scale': 128,
              'update_proj_gap': 200, 'proj_type': 'std'}]
optimizer = APOLLO(param_groups, lr=0.01)

Sign up or log in to comment