Spaces:
Running
Running
Optimizers
#2
by
s3nh
- opened
https://github.com/MadsToftrup/Apollo-dev
Unofficial implementation of APOLLO
APOLLO: SGD-like Memory, AdamW-level Performance"
from galore_torch import APOLLO
# define param groups as apollo_params and non_apollo_params
param_groups = [{'params': non_apollo_params},
{'params': apollo_params, 'rank': 1, 'proj': 'random', 'scale_type': 'tensor', 'scale': 128,
'update_proj_gap': 200, 'proj_type': 'std'}]
optimizer = APOLLO(param_groups, lr=0.01)