diff --git "a/logs/imagenet_1_gimel_0.out" "b/logs/imagenet_1_gimel_0.out" new file mode 100644--- /dev/null +++ "b/logs/imagenet_1_gimel_0.out" @@ -0,0 +1,2067 @@ +Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.01, resume='', save_prefix='imagenet_1', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) +Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.01, resume='', save_prefix='imagenet_1', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) +Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.01, resume='', save_prefix='imagenet_1', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) +Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.01, resume='', save_prefix='imagenet_1', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) +Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.01, resume='', save_prefix='imagenet_1', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) +Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.01, resume='', save_prefix='imagenet_1', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) +Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.01, resume='', save_prefix='imagenet_1', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) +Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.01, resume='', save_prefix='imagenet_1', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1) +model: + base_learning_rate: 4.5e-06 + params: + ddconfig: + attn_resolutions: + - 16 + ch: 128 + ch_mult: + - 1 + - 1 + - 2 + - 2 + - 4 + double_z: false + dropout: 0.0 + in_channels: 3 + num_res_blocks: 2 + out_ch: 3 + resolution: 256 + z_channels: 256 + embed_dim: 256 + lossconfig: + params: + codebook_weight: 1.0 + disc_conditional: false + disc_in_channels: 3 + disc_num_layers: 2 + disc_start: 0 + disc_weight: 0.75 + target: vqloss.VQLPIPSWithDiscriminator + monitor: val/rec_loss + n_embed: 16384 + target: vqmodel.VQModel + +Working with z of shape (1, 256, 16, 16) = 65536 dimensions. +loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth +VQLPIPSWithDiscriminator running with hinge loss. +Loaded VQ encoder. +Data loaded: dataset contains 12811 images, and takes 51 training iterations per epoch. +Number of parameters: 750659840 +Running on 8 GPUs total +=> no checkpoint loaded, will train from scratch +/scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. + warnings.warn(warning.format(ret)) +/scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. + warnings.warn(warning.format(ret)) +/scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. + warnings.warn(warning.format(ret)) +/scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. + warnings.warn(warning.format(ret)) +/scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. + warnings.warn(warning.format(ret)) +/scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. + warnings.warn(warning.format(ret)) +/scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. + warnings.warn(warning.format(ret)) +/scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead. + warnings.warn(warning.format(ret)) +Epoch: 0 | Training loss: 7.043546527039771 | Elapsed time: 49.193817377090454 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 1 | Training loss: 6.766808285432703 | Elapsed time: 45.96229410171509 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 2 | Training loss: 6.7277149593128875 | Elapsed time: 46.14883041381836 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 3 | Training loss: 6.7078076343910364 | Elapsed time: 45.933130741119385 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 4 | Training loss: 6.6812375199561025 | Elapsed time: 45.77527356147766 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 5 | Training loss: 6.67377690707936 | Elapsed time: 45.864094734191895 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 6 | Training loss: 6.658007051430497 | Elapsed time: 45.988351821899414 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 7 | Training loss: 6.615950864904067 | Elapsed time: 45.774882078170776 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 8 | Training loss: 6.600029131945441 | Elapsed time: 45.80882525444031 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 9 | Training loss: 6.592601000093946 | Elapsed time: 45.84931421279907 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 10 | Training loss: 6.590933865191889 | Elapsed time: 45.8228657245636 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 11 | Training loss: 6.568142750683953 | Elapsed time: 45.92162799835205 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 12 | Training loss: 6.516884728973987 | Elapsed time: 45.73153877258301 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 13 | Training loss: 6.501673726474538 | Elapsed time: 45.835033893585205 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 14 | Training loss: 6.505044058257458 | Elapsed time: 45.953683853149414 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 15 | Training loss: 6.50286782021616 | Elapsed time: 45.89152216911316 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 16 | Training loss: 6.503354465260225 | Elapsed time: 45.77711772918701 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 17 | Training loss: 6.473606147018134 | Elapsed time: 45.82464575767517 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 18 | Training loss: 6.461213009030211 | Elapsed time: 45.83788180351257 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 19 | Training loss: 6.456397150077072 | Elapsed time: 45.82571887969971 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 20 | Training loss: 6.4360804931790225 | Elapsed time: 45.84931516647339 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 21 | Training loss: 6.441464405433805 | Elapsed time: 45.826802492141724 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 22 | Training loss: 6.428643497766233 | Elapsed time: 45.96351718902588 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 23 | Training loss: 6.42662462533689 | Elapsed time: 46.099257469177246 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 24 | Training loss: 6.400744569067862 | Elapsed time: 45.83628869056702 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 25 | Training loss: 6.397740392123952 | Elapsed time: 45.75833988189697 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 26 | Training loss: 6.382185440437467 | Elapsed time: 45.82108950614929 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 27 | Training loss: 6.350544826657164 | Elapsed time: 45.96693181991577 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 28 | Training loss: 6.346963985293519 | Elapsed time: 45.8038649559021 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 29 | Training loss: 6.343194980247348 | Elapsed time: 45.77959585189819 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 30 | Training loss: 6.28940362556308 | Elapsed time: 45.76897430419922 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 31 | Training loss: 6.294970213198194 | Elapsed time: 45.79410696029663 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 32 | Training loss: 6.27657787472594 | Elapsed time: 45.79617094993591 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 33 | Training loss: 6.295723213868983 | Elapsed time: 45.89515805244446 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 34 | Training loss: 6.276504460503073 | Elapsed time: 45.80754327774048 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 35 | Training loss: 6.24951296226651 | Elapsed time: 45.83139681816101 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 36 | Training loss: 6.2277555839688175 | Elapsed time: 45.820045709609985 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 37 | Training loss: 6.247811560537301 | Elapsed time: 45.73702096939087 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 38 | Training loss: 6.2054581081165985 | Elapsed time: 45.76875066757202 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_038_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 39 | Training loss: 6.204620034086938 | Elapsed time: 45.82428002357483 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_039_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 40 | Training loss: 6.19763027452955 | Elapsed time: 45.79085445404053 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_040_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 41 | Training loss: 6.186872248556099 | Elapsed time: 45.759870767593384 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_041_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 42 | Training loss: 6.176399595597211 | Elapsed time: 45.90772318840027 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_042_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 43 | Training loss: 6.164415761536243 | Elapsed time: 45.79914569854736 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_043_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 44 | Training loss: 6.156171013327206 | Elapsed time: 45.783682107925415 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_044_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 45 | Training loss: 6.136411049786736 | Elapsed time: 45.7662353515625 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_045_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 46 | Training loss: 6.145836213055779 | Elapsed time: 46.00381064414978 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_046_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 47 | Training loss: 6.117194213119208 | Elapsed time: 45.8336284160614 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_047_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 48 | Training loss: 6.120359916313022 | Elapsed time: 45.8475456237793 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_048_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 49 | Training loss: 6.090019730960622 | Elapsed time: 45.91952085494995 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_049_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 50 | Training loss: 6.103079870635388 | Elapsed time: 45.82374167442322 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_050_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 51 | Training loss: 6.099720646353329 | Elapsed time: 45.8146276473999 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_051_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 52 | Training loss: 6.078967524509804 | Elapsed time: 45.84751915931702 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_052_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 53 | Training loss: 6.093345492493873 | Elapsed time: 45.81482267379761 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_053_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 54 | Training loss: 6.067162466984169 | Elapsed time: 45.814218044281006 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_054_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 55 | Training loss: 6.05002846437342 | Elapsed time: 45.77687382698059 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_055_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 56 | Training loss: 6.085603124955121 | Elapsed time: 45.85694336891174 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_056_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 57 | Training loss: 6.040667290781059 | Elapsed time: 45.850117206573486 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_057_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 58 | Training loss: 6.053070760240741 | Elapsed time: 45.855679512023926 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_058_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 59 | Training loss: 6.038635581147437 | Elapsed time: 46.196725368499756 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_059_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 60 | Training loss: 6.0327114217421585 | Elapsed time: 45.87923574447632 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_060_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 61 | Training loss: 6.013049396814084 | Elapsed time: 45.840707778930664 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_061_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 62 | Training loss: 6.008094909144383 | Elapsed time: 45.9237003326416 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_062_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 63 | Training loss: 6.001737875096938 | Elapsed time: 45.93108773231506 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_063_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 64 | Training loss: 6.007056582207773 | Elapsed time: 45.847880125045776 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_064_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 65 | Training loss: 6.002021284664378 | Elapsed time: 45.81057333946228 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_065_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 66 | Training loss: 5.973114107169357 | Elapsed time: 45.96873211860657 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_066_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 67 | Training loss: 5.980071273504519 | Elapsed time: 45.72526502609253 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_067_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 68 | Training loss: 5.954887249890496 | Elapsed time: 45.765860080718994 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_068_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 69 | Training loss: 5.973253296870811 | Elapsed time: 45.73172736167908 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_069_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 70 | Training loss: 5.976043280433206 | Elapsed time: 45.77319359779358 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_070_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 71 | Training loss: 5.957718811783136 | Elapsed time: 45.764458656311035 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_071_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 72 | Training loss: 5.950902733148313 | Elapsed time: 45.82774066925049 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_072_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 73 | Training loss: 5.933941579332538 | Elapsed time: 45.91219735145569 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_073_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 74 | Training loss: 5.934334437052409 | Elapsed time: 45.90277457237244 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_074_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 75 | Training loss: 5.938446157118854 | Elapsed time: 45.79429340362549 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_075_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 76 | Training loss: 5.9110984989241055 | Elapsed time: 45.86379647254944 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_076_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 77 | Training loss: 5.916216934428496 | Elapsed time: 45.835638999938965 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_077_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 78 | Training loss: 5.8931853163476084 | Elapsed time: 45.88514256477356 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_078_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 79 | Training loss: 5.91504778581507 | Elapsed time: 45.85122728347778 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_079_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 80 | Training loss: 5.900861272624895 | Elapsed time: 45.9109148979187 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_080_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 81 | Training loss: 5.914509025274539 | Elapsed time: 45.88674068450928 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_081_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 82 | Training loss: 5.883009845135259 | Elapsed time: 45.80745792388916 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_082_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 83 | Training loss: 5.870837183559642 | Elapsed time: 45.78189659118652 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_083_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 84 | Training loss: 5.857988899829341 | Elapsed time: 45.92634582519531 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_084_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 85 | Training loss: 5.86151720495785 | Elapsed time: 45.87733244895935 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_085_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 86 | Training loss: 5.855651874168246 | Elapsed time: 45.8209707736969 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_086_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 87 | Training loss: 5.870900677699669 | Elapsed time: 45.85345458984375 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_087_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 88 | Training loss: 5.844179415235333 | Elapsed time: 45.91537070274353 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_088_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 89 | Training loss: 5.852461319343717 | Elapsed time: 45.810298681259155 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_089_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 90 | Training loss: 5.842968865936878 | Elapsed time: 45.83434748649597 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_090_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 91 | Training loss: 5.808148327995749 | Elapsed time: 45.91014909744263 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_091_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 92 | Training loss: 5.81377062143064 | Elapsed time: 45.75301122665405 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_092_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 93 | Training loss: 5.80474463631125 | Elapsed time: 45.8694052696228 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_093_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 94 | Training loss: 5.808050445481842 | Elapsed time: 45.92968440055847 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_094_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 95 | Training loss: 5.779785838781619 | Elapsed time: 45.79834818840027 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_095_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 96 | Training loss: 5.821337017358518 | Elapsed time: 45.89797568321228 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_096_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 97 | Training loss: 5.791053360583735 | Elapsed time: 45.80914282798767 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_097_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 98 | Training loss: 5.766161357655244 | Elapsed time: 45.777507305145264 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_098_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 99 | Training loss: 5.7779078857571475 | Elapsed time: 45.7093288898468 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_099_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 100 | Training loss: 5.7458407177644615 | Elapsed time: 45.818432092666626 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_100_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 101 | Training loss: 5.756022238263897 | Elapsed time: 45.834561586380005 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_101_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 102 | Training loss: 5.744619107713886 | Elapsed time: 45.75854730606079 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_102_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 103 | Training loss: 5.74448143267164 | Elapsed time: 45.78126859664917 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_103_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 104 | Training loss: 5.741693029216692 | Elapsed time: 45.68103051185608 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_104_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 105 | Training loss: 5.718536881839528 | Elapsed time: 45.72249412536621 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_105_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 106 | Training loss: 5.71733944088805 | Elapsed time: 45.91015815734863 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_106_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 107 | Training loss: 5.707048659231148 | Elapsed time: 46.057250022888184 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_107_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 108 | Training loss: 5.667762363658232 | Elapsed time: 45.967145681381226 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_108_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 109 | Training loss: 5.660145853080001 | Elapsed time: 45.90626072883606 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_109_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 110 | Training loss: 5.6487691355686565 | Elapsed time: 45.775020599365234 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_110_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 111 | Training loss: 5.6561916669209795 | Elapsed time: 45.734315156936646 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_111_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 112 | Training loss: 5.644394612779804 | Elapsed time: 45.806397438049316 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_112_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 113 | Training loss: 5.618572403402889 | Elapsed time: 45.87484288215637 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_113_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 114 | Training loss: 5.611732613806631 | Elapsed time: 45.8614821434021 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_114_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 115 | Training loss: 5.596433265536439 | Elapsed time: 45.7845504283905 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_115_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 116 | Training loss: 5.562562961204379 | Elapsed time: 45.77793574333191 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_116_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 117 | Training loss: 5.5593670489741305 | Elapsed time: 45.92505168914795 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_117_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 118 | Training loss: 5.5695345635507625 | Elapsed time: 45.747283697128296 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_118_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 119 | Training loss: 5.526777164608824 | Elapsed time: 45.84762406349182 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_119_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 120 | Training loss: 5.550149263120165 | Elapsed time: 45.82103157043457 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_120_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 121 | Training loss: 5.54259381574743 | Elapsed time: 45.84182643890381 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_121_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 122 | Training loss: 5.505039981767243 | Elapsed time: 45.90591907501221 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_122_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 123 | Training loss: 5.504643281300862 | Elapsed time: 46.05148243904114 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_123_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 124 | Training loss: 5.482269782646029 | Elapsed time: 45.78461837768555 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_124_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 125 | Training loss: 5.483937712276683 | Elapsed time: 45.81445789337158 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_125_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 126 | Training loss: 5.455186086542466 | Elapsed time: 45.9881477355957 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_126_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 127 | Training loss: 5.439026505339379 | Elapsed time: 45.76537275314331 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_127_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 128 | Training loss: 5.426474430981805 | Elapsed time: 45.84377074241638 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_128_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 129 | Training loss: 5.441283291461421 | Elapsed time: 45.71045708656311 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_129_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 130 | Training loss: 5.393452476052677 | Elapsed time: 45.957597970962524 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_130_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 131 | Training loss: 5.398462678871903 | Elapsed time: 45.80257558822632 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_131_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 132 | Training loss: 5.384348785175996 | Elapsed time: 45.747392416000366 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_132_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 133 | Training loss: 5.362966294382133 | Elapsed time: 45.924644947052 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_133_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 134 | Training loss: 5.313198332693062 | Elapsed time: 45.8295202255249 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_134_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 135 | Training loss: 5.329458479787789 | Elapsed time: 45.850850343704224 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_135_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 136 | Training loss: 5.344707526412665 | Elapsed time: 45.75453019142151 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_136_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 137 | Training loss: 5.3113769643446975 | Elapsed time: 45.78729844093323 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_137_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 138 | Training loss: 5.284378865185906 | Elapsed time: 45.81169605255127 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_138_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 139 | Training loss: 5.304612515019436 | Elapsed time: 45.742329359054565 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_139_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 140 | Training loss: 5.270312673905316 | Elapsed time: 45.75006365776062 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_140_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 141 | Training loss: 5.274997309142468 | Elapsed time: 45.8971631526947 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_141_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 142 | Training loss: 5.2285739580790205 | Elapsed time: 45.79220795631409 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_142_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 143 | Training loss: 5.2100929091958434 | Elapsed time: 45.80806827545166 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_143_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 144 | Training loss: 5.2202148530997485 | Elapsed time: 45.78081679344177 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_144_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 145 | Training loss: 5.19704174527935 | Elapsed time: 45.741596937179565 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_145_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 146 | Training loss: 5.173148903192258 | Elapsed time: 45.751710653305054 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_146_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 147 | Training loss: 5.1759005527870325 | Elapsed time: 45.75114870071411 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_147_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 148 | Training loss: 5.156887895920697 | Elapsed time: 45.799901723861694 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_148_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 149 | Training loss: 5.177238987941368 | Elapsed time: 45.7511043548584 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_149_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 150 | Training loss: 5.148730418261359 | Elapsed time: 45.734413862228394 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_150_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 151 | Training loss: 5.136284052156935 | Elapsed time: 45.82698345184326 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_151_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 152 | Training loss: 5.102546149609136 | Elapsed time: 45.77281069755554 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_152_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 153 | Training loss: 5.1002951883802226 | Elapsed time: 45.791810750961304 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_153_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 154 | Training loss: 5.130586418451047 | Elapsed time: 45.74246263504028 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_154_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 155 | Training loss: 5.102615010504629 | Elapsed time: 45.76729488372803 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_155_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 156 | Training loss: 5.017488114974078 | Elapsed time: 45.84599995613098 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_156_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 157 | Training loss: 5.011446672327378 | Elapsed time: 45.751790285110474 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_157_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 158 | Training loss: 5.044695255803127 | Elapsed time: 45.711729288101196 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_158_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 159 | Training loss: 5.043585356544046 | Elapsed time: 45.80864238739014 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_159_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 160 | Training loss: 5.043942002689137 | Elapsed time: 45.69001770019531 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_160_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 161 | Training loss: 4.986280431934431 | Elapsed time: 45.816362380981445 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_161_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 162 | Training loss: 5.006250718060662 | Elapsed time: 45.86846208572388 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_162_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 163 | Training loss: 4.98032782124538 | Elapsed time: 45.739298820495605 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_163_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 164 | Training loss: 4.956909151638255 | Elapsed time: 45.764023780822754 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_164_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 165 | Training loss: 4.983159027847589 | Elapsed time: 45.87623715400696 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_165_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 166 | Training loss: 4.971370117337096 | Elapsed time: 45.748239040374756 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_166_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 167 | Training loss: 4.964275771496343 | Elapsed time: 45.75476932525635 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_167_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 168 | Training loss: 4.919794307035558 | Elapsed time: 45.74912643432617 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_168_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 169 | Training loss: 4.888849492166557 | Elapsed time: 45.772761821746826 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_169_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 170 | Training loss: 4.929207437178668 | Elapsed time: 45.7736656665802 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_170_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 171 | Training loss: 4.903938695496204 | Elapsed time: 45.806894063949585 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_171_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 172 | Training loss: 4.912430772594377 | Elapsed time: 45.7766273021698 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_172_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 173 | Training loss: 4.875202618393243 | Elapsed time: 45.75883746147156 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_173_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 174 | Training loss: 4.887735301373052 | Elapsed time: 45.73487448692322 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_174_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 175 | Training loss: 4.865130134657318 | Elapsed time: 45.75269269943237 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_175_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 176 | Training loss: 4.842269289727304 | Elapsed time: 45.78662943840027 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_176_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 177 | Training loss: 4.82957805371752 | Elapsed time: 45.76472043991089 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_177_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 178 | Training loss: 4.811760229222915 | Elapsed time: 45.75517654418945 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_178_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 179 | Training loss: 4.836708807477764 | Elapsed time: 45.77725148200989 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_179_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 180 | Training loss: 4.819212436676025 | Elapsed time: 45.91866397857666 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_180_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 181 | Training loss: 4.818228356978473 | Elapsed time: 45.80518388748169 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_181_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 182 | Training loss: 4.7841367347567685 | Elapsed time: 45.764925956726074 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_182_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 183 | Training loss: 4.770917518466127 | Elapsed time: 45.76788902282715 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_183_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 184 | Training loss: 4.780682769476199 | Elapsed time: 45.82425236701965 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_184_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 185 | Training loss: 4.7824880843069035 | Elapsed time: 45.82774043083191 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_185_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 186 | Training loss: 4.75738458072438 | Elapsed time: 45.83792591094971 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_186_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 187 | Training loss: 4.709744191637226 | Elapsed time: 45.93032932281494 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_187_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 188 | Training loss: 4.731794413398294 | Elapsed time: 45.743499517440796 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_188_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 189 | Training loss: 4.7199934884613635 | Elapsed time: 45.73766350746155 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_189_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 190 | Training loss: 4.701465634738698 | Elapsed time: 45.85090613365173 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_190_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 191 | Training loss: 4.7412017373477715 | Elapsed time: 45.801029920578 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_191_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 192 | Training loss: 4.698865974650664 | Elapsed time: 45.804718017578125 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_192_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 193 | Training loss: 4.729171575284472 | Elapsed time: 45.78995990753174 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_193_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 194 | Training loss: 4.647403791839001 | Elapsed time: 45.7976770401001 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_194_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 195 | Training loss: 4.709497479831471 | Elapsed time: 45.7253623008728 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_195_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 196 | Training loss: 4.648949828802371 | Elapsed time: 45.78498458862305 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_196_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 197 | Training loss: 4.6949026350881535 | Elapsed time: 45.758655309677124 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_197_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 198 | Training loss: 4.646475072000541 | Elapsed time: 45.756237745285034 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_198_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 199 | Training loss: 4.634022684658275 | Elapsed time: 45.96502208709717 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_199_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 200 | Training loss: 4.632112465652765 | Elapsed time: 45.81623077392578 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_200_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 201 | Training loss: 4.621544454611984 | Elapsed time: 45.85079646110535 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_201_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 202 | Training loss: 4.644361252878227 | Elapsed time: 45.748879194259644 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_202_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 203 | Training loss: 4.616911486083386 | Elapsed time: 45.860562324523926 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_203_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 204 | Training loss: 4.611448830249262 | Elapsed time: 45.73089814186096 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_204_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 205 | Training loss: 4.586967996522492 | Elapsed time: 45.78876519203186 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_205_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 206 | Training loss: 4.586296586429372 | Elapsed time: 45.86703395843506 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_206_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 207 | Training loss: 4.592804824604707 | Elapsed time: 45.70067095756531 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_207_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 208 | Training loss: 4.583900189867206 | Elapsed time: 45.74430775642395 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_208_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 209 | Training loss: 4.558383231069527 | Elapsed time: 45.81425952911377 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_209_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 210 | Training loss: 4.577421880235859 | Elapsed time: 45.907869815826416 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_210_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 211 | Training loss: 4.553622984418682 | Elapsed time: 45.83412003517151 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_211_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 212 | Training loss: 4.572612986845129 | Elapsed time: 45.73381996154785 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_212_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 213 | Training loss: 4.586323046216778 | Elapsed time: 45.719831228256226 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_213_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 214 | Training loss: 4.540239951189826 | Elapsed time: 45.73864936828613 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_214_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 215 | Training loss: 4.545975974961823 | Elapsed time: 45.72970724105835 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_215_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 216 | Training loss: 4.520774663663378 | Elapsed time: 45.84546613693237 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_216_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 217 | Training loss: 4.523713037079456 | Elapsed time: 45.755959272384644 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_217_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 218 | Training loss: 4.517030725292131 | Elapsed time: 45.785149335861206 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_218_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 219 | Training loss: 4.5025408034231145 | Elapsed time: 45.926584243774414 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_219_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 220 | Training loss: 4.510585223927217 | Elapsed time: 45.778274059295654 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_220_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 221 | Training loss: 4.48927323958453 | Elapsed time: 45.75522518157959 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_221_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 222 | Training loss: 4.508942744311164 | Elapsed time: 45.81759762763977 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_222_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 223 | Training loss: 4.499260537764606 | Elapsed time: 45.859750747680664 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_223_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 224 | Training loss: 4.459998495438519 | Elapsed time: 45.721718072891235 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_224_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 225 | Training loss: 4.450929931565827 | Elapsed time: 45.763463735580444 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_225_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 226 | Training loss: 4.437629054574406 | Elapsed time: 45.74750375747681 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_226_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 227 | Training loss: 4.478469942130294 | Elapsed time: 45.81532025337219 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_227_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 228 | Training loss: 4.471122751048967 | Elapsed time: 45.91307973861694 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_228_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 229 | Training loss: 4.439887533000872 | Elapsed time: 45.83796525001526 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_229_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 230 | Training loss: 4.444347615335502 | Elapsed time: 45.76841473579407 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_230_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 231 | Training loss: 4.442479806787827 | Elapsed time: 45.749823808670044 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_231_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 232 | Training loss: 4.441741606768439 | Elapsed time: 45.8878710269928 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_232_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 233 | Training loss: 4.40933624903361 | Elapsed time: 45.78229308128357 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_233_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 234 | Training loss: 4.4087340551264145 | Elapsed time: 45.85774254798889 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_234_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 235 | Training loss: 4.366470580007515 | Elapsed time: 45.779969453811646 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_235_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 236 | Training loss: 4.40992314675275 | Elapsed time: 45.734832525253296 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_236_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 237 | Training loss: 4.399168098674101 | Elapsed time: 45.841670751571655 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_237_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 238 | Training loss: 4.401692862604179 | Elapsed time: 45.78200316429138 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_238_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 239 | Training loss: 4.386419698303821 | Elapsed time: 45.804877281188965 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_239_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 240 | Training loss: 4.36597565576142 | Elapsed time: 45.77377367019653 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_240_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 241 | Training loss: 4.364088619456572 | Elapsed time: 46.114524126052856 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_241_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 242 | Training loss: 4.366300274344051 | Elapsed time: 45.716119050979614 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_242_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 243 | Training loss: 4.356059710184733 | Elapsed time: 45.898865938186646 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_243_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 244 | Training loss: 4.362691149992101 | Elapsed time: 45.72869849205017 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_244_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 245 | Training loss: 4.345884912154254 | Elapsed time: 45.78029680252075 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_245_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 246 | Training loss: 4.355045384051753 | Elapsed time: 45.8501615524292 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_246_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 247 | Training loss: 4.323636354184618 | Elapsed time: 45.897467374801636 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_247_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 248 | Training loss: 4.340930859247844 | Elapsed time: 45.83033633232117 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_248_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 249 | Training loss: 4.332936810512169 | Elapsed time: 45.69551372528076 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_249_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 250 | Training loss: 4.329319149840112 | Elapsed time: 45.90491342544556 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_250_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 251 | Training loss: 4.326518362643672 | Elapsed time: 45.81706166267395 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_251_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 252 | Training loss: 4.328127253289316 | Elapsed time: 45.79682493209839 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_252_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 253 | Training loss: 4.3200018452663045 | Elapsed time: 45.76698184013367 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_253_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 254 | Training loss: 4.30009443619672 | Elapsed time: 45.7466516494751 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_254_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 255 | Training loss: 4.32702132767322 | Elapsed time: 45.93168902397156 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_255_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 256 | Training loss: 4.318385638442694 | Elapsed time: 45.84888577461243 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_256_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 257 | Training loss: 4.283635723824594 | Elapsed time: 45.68606376647949 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_257_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 258 | Training loss: 4.287235007566564 | Elapsed time: 45.796438694000244 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_258_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 259 | Training loss: 4.291142042945413 | Elapsed time: 45.826446771621704 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_259_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 260 | Training loss: 4.291378287708058 | Elapsed time: 45.72973394393921 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_260_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 261 | Training loss: 4.2802208404915 | Elapsed time: 45.81098556518555 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_261_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 262 | Training loss: 4.255209679697074 | Elapsed time: 45.787789821624756 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_262_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 263 | Training loss: 4.2665513450024175 | Elapsed time: 45.82381319999695 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_263_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 264 | Training loss: 4.271441973891913 | Elapsed time: 45.88394546508789 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_264_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 265 | Training loss: 4.250896785773483 | Elapsed time: 45.837327003479004 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_265_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 266 | Training loss: 4.236474158717137 | Elapsed time: 45.67136311531067 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_266_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 267 | Training loss: 4.261651983448103 | Elapsed time: 45.80492830276489 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_267_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 268 | Training loss: 4.262852098427567 | Elapsed time: 45.793686628341675 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_268_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 269 | Training loss: 4.254002169066784 | Elapsed time: 45.75695252418518 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_269_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 270 | Training loss: 4.2305574043124325 | Elapsed time: 45.81221318244934 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_270_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 271 | Training loss: 4.245378372715969 | Elapsed time: 45.75695180892944 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_271_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 272 | Training loss: 4.2464976871714875 | Elapsed time: 45.68888545036316 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_272_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 273 | Training loss: 4.271517949945786 | Elapsed time: 45.89753794670105 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_273_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 274 | Training loss: 4.230192305994969 | Elapsed time: 45.75472331047058 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_274_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 275 | Training loss: 4.218163312650194 | Elapsed time: 45.742302894592285 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_275_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 276 | Training loss: 4.204744661555571 | Elapsed time: 45.76910638809204 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_276_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 277 | Training loss: 4.205704726424872 | Elapsed time: 45.69619870185852 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_277_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 278 | Training loss: 4.195005183126412 | Elapsed time: 45.73953175544739 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_278_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 279 | Training loss: 4.201608195024378 | Elapsed time: 45.783344984054565 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_279_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 280 | Training loss: 4.195655892877018 | Elapsed time: 45.8114960193634 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_280_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 281 | Training loss: 4.18010698112787 | Elapsed time: 46.116570472717285 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_281_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 282 | Training loss: 4.208772874345966 | Elapsed time: 45.81240391731262 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_282_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 283 | Training loss: 4.158667989805633 | Elapsed time: 45.78012800216675 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_283_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 284 | Training loss: 4.155715956407435 | Elapsed time: 45.70797300338745 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_284_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 285 | Training loss: 4.19122621124866 | Elapsed time: 45.85955238342285 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_285_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 286 | Training loss: 4.181657024458343 | Elapsed time: 45.9048957824707 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_286_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 287 | Training loss: 4.173296774134917 | Elapsed time: 45.90652275085449 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_287_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 288 | Training loss: 4.1655091117410095 | Elapsed time: 45.76054763793945 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_288_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 289 | Training loss: 4.167976977778416 | Elapsed time: 45.72291946411133 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_289_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 290 | Training loss: 4.149198934143665 | Elapsed time: 45.73708915710449 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_290_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 291 | Training loss: 4.1540939387153175 | Elapsed time: 45.81145882606506 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_291_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 292 | Training loss: 4.168963446336634 | Elapsed time: 45.83398151397705 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_292_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 293 | Training loss: 4.13707358229394 | Elapsed time: 45.85177135467529 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_293_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 294 | Training loss: 4.13791778975842 | Elapsed time: 45.806588888168335 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_294_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 295 | Training loss: 4.150983015696208 | Elapsed time: 46.12140393257141 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_295_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 296 | Training loss: 4.152443829704733 | Elapsed time: 46.006781816482544 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_296_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 297 | Training loss: 4.140699751236859 | Elapsed time: 45.80596995353699 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_297_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 298 | Training loss: 4.113497074912576 | Elapsed time: 45.744346380233765 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_298_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 299 | Training loss: 4.14106962260078 | Elapsed time: 45.85153651237488 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_299_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 300 | Training loss: 4.0912254744885015 | Elapsed time: 45.86111855506897 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_300_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 301 | Training loss: 4.105710969251745 | Elapsed time: 46.15726041793823 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_301_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 302 | Training loss: 4.134032791736079 | Elapsed time: 45.851938247680664 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_302_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 303 | Training loss: 4.101124782188266 | Elapsed time: 46.084498167037964 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_303_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 304 | Training loss: 4.079764543795118 | Elapsed time: 45.69289016723633 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_304_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 305 | Training loss: 4.104470552182665 | Elapsed time: 45.69515371322632 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_305_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 306 | Training loss: 4.083122487161674 | Elapsed time: 45.78936433792114 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_306_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 307 | Training loss: 4.115095362943761 | Elapsed time: 45.794105768203735 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_307_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 308 | Training loss: 4.096024153279323 | Elapsed time: 45.705485820770264 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_308_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 309 | Training loss: 4.103306597354365 | Elapsed time: 45.68509650230408 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_309_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 310 | Training loss: 4.093474112304986 | Elapsed time: 45.92891597747803 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_310_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 311 | Training loss: 4.081493807773964 | Elapsed time: 45.780004024505615 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_311_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 312 | Training loss: 4.075444156048345 | Elapsed time: 45.976975440979004 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_312_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 313 | Training loss: 4.075511324639414 | Elapsed time: 45.748451948165894 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_313_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 314 | Training loss: 4.068175965664434 | Elapsed time: 45.7329797744751 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_314_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 315 | Training loss: 4.055799559050915 | Elapsed time: 45.69804787635803 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_315_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 316 | Training loss: 4.049119654823752 | Elapsed time: 45.77945375442505 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_316_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 317 | Training loss: 4.083451668421428 | Elapsed time: 45.751957416534424 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_317_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 318 | Training loss: 4.033344582015393 | Elapsed time: 45.83178949356079 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_318_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 319 | Training loss: 4.067084915497723 | Elapsed time: 45.81037926673889 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_319_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 320 | Training loss: 4.04551118962905 | Elapsed time: 45.74399447441101 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_320_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 321 | Training loss: 4.0350420662001065 | Elapsed time: 45.922539949417114 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_321_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 322 | Training loss: 4.01402564609752 | Elapsed time: 45.86149764060974 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_322_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 323 | Training loss: 4.049276492174934 | Elapsed time: 45.75879120826721 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_323_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 324 | Training loss: 4.0065331552542895 | Elapsed time: 45.81314015388489 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_324_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 325 | Training loss: 4.043389273624794 | Elapsed time: 45.714417934417725 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_325_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 326 | Training loss: 4.03416376955369 | Elapsed time: 45.82090425491333 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_326_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 327 | Training loss: 4.037468414680631 | Elapsed time: 45.85242700576782 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_327_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 328 | Training loss: 4.007841858209348 | Elapsed time: 45.801703453063965 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_328_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 329 | Training loss: 3.9982581699595734 | Elapsed time: 45.78855085372925 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_329_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 330 | Training loss: 4.00534195058486 | Elapsed time: 45.807350158691406 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_330_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 331 | Training loss: 4.020159501655429 | Elapsed time: 45.84674644470215 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_331_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 332 | Training loss: 4.016251213410321 | Elapsed time: 45.768020153045654 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_332_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 333 | Training loss: 4.032099424623976 | Elapsed time: 45.76662302017212 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_333_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 334 | Training loss: 4.01971108305688 | Elapsed time: 45.80872869491577 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_334_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 335 | Training loss: 4.005303209903193 | Elapsed time: 45.882874488830566 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_335_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 336 | Training loss: 3.9871019896338966 | Elapsed time: 45.813493490219116 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_336_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 337 | Training loss: 3.9960988596373914 | Elapsed time: 45.78709959983826 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_337_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 338 | Training loss: 3.988836886836033 | Elapsed time: 45.71905064582825 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_338_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 339 | Training loss: 3.983921294118844 | Elapsed time: 45.76715135574341 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_339_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 340 | Training loss: 3.9880628352071725 | Elapsed time: 45.7093620300293 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_340_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 341 | Training loss: 3.9547264108470843 | Elapsed time: 45.72784614562988 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_341_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 342 | Training loss: 3.962370222690059 | Elapsed time: 45.78838920593262 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_342_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 343 | Training loss: 3.9593217840381696 | Elapsed time: 45.78308367729187 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_343_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 344 | Training loss: 3.953524435267729 | Elapsed time: 45.81917881965637 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_344_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 345 | Training loss: 3.979203551423316 | Elapsed time: 45.712756872177124 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_345_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 346 | Training loss: 3.9718121126586317 | Elapsed time: 45.713664531707764 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_346_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 347 | Training loss: 3.958053761837529 | Elapsed time: 45.75751328468323 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_347_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 348 | Training loss: 3.939714183994368 | Elapsed time: 45.796645164489746 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_348_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 349 | Training loss: 3.9520174709020877 | Elapsed time: 45.739367961883545 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_349_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 350 | Training loss: 3.9666887545118144 | Elapsed time: 45.76844596862793 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_350_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 351 | Training loss: 3.9439862802916883 | Elapsed time: 45.73711657524109 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_351_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 352 | Training loss: 3.928974469502767 | Elapsed time: 45.781933546066284 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_352_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 353 | Training loss: 3.946581812465892 | Elapsed time: 45.99103569984436 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_353_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 354 | Training loss: 3.9443052376017853 | Elapsed time: 45.83690166473389 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_354_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 355 | Training loss: 3.931907144247317 | Elapsed time: 45.83392024040222 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_355_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 356 | Training loss: 3.916507968715593 | Elapsed time: 45.781365156173706 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_356_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 357 | Training loss: 3.936722390791949 | Elapsed time: 45.76520895957947 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_357_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 358 | Training loss: 3.9499794268140604 | Elapsed time: 45.76922154426575 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_358_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 359 | Training loss: 3.928117915695789 | Elapsed time: 45.7315878868103 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_359_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 360 | Training loss: 3.922170269723032 | Elapsed time: 45.84778714179993 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_360_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 361 | Training loss: 3.9393967880922207 | Elapsed time: 45.77668833732605 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_361_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 362 | Training loss: 3.9055882528716444 | Elapsed time: 45.85438275337219 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_362_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 363 | Training loss: 3.9041427584255444 | Elapsed time: 45.92664623260498 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_363_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 364 | Training loss: 3.9198923110961914 | Elapsed time: 45.843753814697266 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_364_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 365 | Training loss: 3.9194945587831387 | Elapsed time: 45.823097705841064 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_365_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 366 | Training loss: 3.9005069358676088 | Elapsed time: 45.737046241760254 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_366_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 367 | Training loss: 3.9175963542040657 | Elapsed time: 45.75269389152527 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_367_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 368 | Training loss: 3.9338943677790024 | Elapsed time: 45.840890407562256 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_368_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 369 | Training loss: 3.919556930953381 | Elapsed time: 45.86128306388855 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_369_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 370 | Training loss: 3.908453950694963 | Elapsed time: 45.869569301605225 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_370_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 371 | Training loss: 3.9174689919340846 | Elapsed time: 45.84985303878784 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_371_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 372 | Training loss: 3.888943401037478 | Elapsed time: 45.76687431335449 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_372_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 373 | Training loss: 3.8911954187879374 | Elapsed time: 45.78665804862976 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_373_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 374 | Training loss: 3.902667391533945 | Elapsed time: 45.712159872055054 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_374_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 375 | Training loss: 3.896462136623906 | Elapsed time: 45.71942639350891 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_375_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 376 | Training loss: 3.8723799621357635 | Elapsed time: 45.73902106285095 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_376_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 377 | Training loss: 3.8912426957897113 | Elapsed time: 45.883963108062744 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_377_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 378 | Training loss: 3.8739771001479206 | Elapsed time: 45.93844985961914 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_378_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 379 | Training loss: 3.8920229837006213 | Elapsed time: 45.72770833969116 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_379_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 380 | Training loss: 3.8773061761669085 | Elapsed time: 45.73350381851196 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_380_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 381 | Training loss: 3.878425822538488 | Elapsed time: 45.88092279434204 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_381_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 382 | Training loss: 3.8983326939975513 | Elapsed time: 45.768494606018066 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_382_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 383 | Training loss: 3.846861993565279 | Elapsed time: 45.77775454521179 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_383_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 384 | Training loss: 3.8405342242297005 | Elapsed time: 45.7558069229126 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_384_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 385 | Training loss: 3.843676627851 | Elapsed time: 45.73359227180481 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_385_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 386 | Training loss: 3.8742800273147284 | Elapsed time: 45.73901128768921 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_386_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 387 | Training loss: 3.8377365785486557 | Elapsed time: 45.81339645385742 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_387_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 388 | Training loss: 3.862943728764852 | Elapsed time: 45.75957155227661 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_388_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 389 | Training loss: 3.8759475268569648 | Elapsed time: 45.690025806427 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_389_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 390 | Training loss: 3.870737529268452 | Elapsed time: 45.86559295654297 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_390_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 391 | Training loss: 3.838643882788864 | Elapsed time: 45.79465293884277 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_391_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 392 | Training loss: 3.851317704892626 | Elapsed time: 45.73580718040466 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_392_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 393 | Training loss: 3.847694008958106 | Elapsed time: 45.74326801300049 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_393_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 394 | Training loss: 3.8514280646455052 | Elapsed time: 45.747926473617554 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_394_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 395 | Training loss: 3.8300009381537343 | Elapsed time: 45.79491209983826 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_395_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 396 | Training loss: 3.8307353234758565 | Elapsed time: 45.73918700218201 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_396_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 397 | Training loss: 3.8397992405236936 | Elapsed time: 45.75033450126648 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_397_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 398 | Training loss: 3.8245228832843257 | Elapsed time: 45.82746744155884 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_398_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 399 | Training loss: 3.816496680764591 | Elapsed time: 45.97912120819092 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_399_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 400 | Training loss: 3.8396658429912494 | Elapsed time: 45.86027407646179 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_400_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 401 | Training loss: 3.8260555875067617 | Elapsed time: 45.787028074264526 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_401_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 402 | Training loss: 3.802743911743164 | Elapsed time: 46.080081939697266 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_402_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 403 | Training loss: 3.8274108662324795 | Elapsed time: 45.80333065986633 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_403_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 404 | Training loss: 3.8292576191472074 | Elapsed time: 45.7592556476593 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_404_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 405 | Training loss: 3.8231166437560438 | Elapsed time: 45.72156286239624 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_405_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 406 | Training loss: 3.8026165915470496 | Elapsed time: 45.79913783073425 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_406_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 407 | Training loss: 3.8298740012972963 | Elapsed time: 45.82620406150818 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_407_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 408 | Training loss: 3.813330187517054 | Elapsed time: 45.7995502948761 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_408_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 409 | Training loss: 3.792220676646513 | Elapsed time: 45.75705933570862 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_409_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 410 | Training loss: 3.8013525710386387 | Elapsed time: 45.812289237976074 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_410_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 411 | Training loss: 3.8094055559120927 | Elapsed time: 45.72086715698242 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_411_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 412 | Training loss: 3.7876689995036408 | Elapsed time: 45.74204921722412 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_412_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 413 | Training loss: 3.8040059454300823 | Elapsed time: 45.69155812263489 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_413_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 414 | Training loss: 3.8034432495341584 | Elapsed time: 45.819308042526245 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_414_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 415 | Training loss: 3.782887108185712 | Elapsed time: 45.81539750099182 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_415_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 416 | Training loss: 3.767841053944008 | Elapsed time: 45.7323112487793 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_416_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 417 | Training loss: 3.7957209278555477 | Elapsed time: 45.89246153831482 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_417_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 418 | Training loss: 3.7892100203271006 | Elapsed time: 45.75222826004028 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_418_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 419 | Training loss: 3.748373419630761 | Elapsed time: 45.92216205596924 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_419_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 420 | Training loss: 3.7854665354186414 | Elapsed time: 45.83523392677307 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_420_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 421 | Training loss: 3.7920419375101724 | Elapsed time: 45.84494090080261 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_421_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 422 | Training loss: 3.7795009987027037 | Elapsed time: 45.767077684402466 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_422_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 423 | Training loss: 3.750762434566722 | Elapsed time: 46.04787635803223 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_423_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 424 | Training loss: 3.766093366286334 | Elapsed time: 45.805742025375366 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_424_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 425 | Training loss: 3.8054140034843895 | Elapsed time: 45.92286729812622 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_425_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 426 | Training loss: 3.7644942227531883 | Elapsed time: 45.71893763542175 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_426_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 427 | Training loss: 3.768899917602539 | Elapsed time: 45.71061563491821 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_427_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 428 | Training loss: 3.7443536075891233 | Elapsed time: 45.79273986816406 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_428_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 429 | Training loss: 3.752335417504404 | Elapsed time: 45.905532360076904 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_429_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 430 | Training loss: 3.7704092287549784 | Elapsed time: 45.92292642593384 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_430_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 431 | Training loss: 3.7447444551131306 | Elapsed time: 45.90982937812805 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_431_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 432 | Training loss: 3.7528877398547005 | Elapsed time: 45.71091651916504 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_432_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 433 | Training loss: 3.7425592646879307 | Elapsed time: 45.780346155166626 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_433_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 434 | Training loss: 3.7589697276844696 | Elapsed time: 45.79366731643677 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_434_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 435 | Training loss: 3.767151355743408 | Elapsed time: 45.962151288986206 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_435_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 436 | Training loss: 3.7206368773591283 | Elapsed time: 45.88848924636841 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_436_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 437 | Training loss: 3.7589551748013963 | Elapsed time: 45.732905626297 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_437_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 438 | Training loss: 3.7365425334257236 | Elapsed time: 45.79592490196228 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_438_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 439 | Training loss: 3.744491787517772 | Elapsed time: 45.678417444229126 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_439_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 440 | Training loss: 3.775090025920494 | Elapsed time: 45.82292556762695 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_440_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 441 | Training loss: 3.7338199708975996 | Elapsed time: 45.68612790107727 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_441_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 442 | Training loss: 3.7420668695487227 | Elapsed time: 45.773292779922485 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_442_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 443 | Training loss: 3.772536048702165 | Elapsed time: 45.8284068107605 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_443_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 444 | Training loss: 3.7435428114498364 | Elapsed time: 45.75700330734253 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_444_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 445 | Training loss: 3.722346081453211 | Elapsed time: 45.759955167770386 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_445_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 446 | Training loss: 3.7265693580403045 | Elapsed time: 45.76993775367737 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_446_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 447 | Training loss: 3.744290739882226 | Elapsed time: 45.82737064361572 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_447_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 448 | Training loss: 3.7217018230288637 | Elapsed time: 45.699047327041626 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_448_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 449 | Training loss: 3.689666897642846 | Elapsed time: 45.707205295562744 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_449_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 450 | Training loss: 3.7514563308042637 | Elapsed time: 45.71065163612366 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_450_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 451 | Training loss: 3.7041939847609577 | Elapsed time: 45.693275451660156 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_451_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 452 | Training loss: 3.7256192833769557 | Elapsed time: 45.778043270111084 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_452_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 453 | Training loss: 3.727032423019409 | Elapsed time: 45.728052854537964 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_453_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 454 | Training loss: 3.722350176642923 | Elapsed time: 45.72817277908325 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_454_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 455 | Training loss: 3.706623825372434 | Elapsed time: 45.84175086021423 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_455_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 456 | Training loss: 3.6808141025842405 | Elapsed time: 45.73637843132019 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_456_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 457 | Training loss: 3.7009049443637623 | Elapsed time: 45.791184425354004 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_457_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 458 | Training loss: 3.696110767476699 | Elapsed time: 45.68181252479553 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_458_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 459 | Training loss: 3.7075692345114315 | Elapsed time: 45.702738761901855 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_459_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 460 | Training loss: 3.7238470058815154 | Elapsed time: 45.78346800804138 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_460_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 461 | Training loss: 3.6902098468705717 | Elapsed time: 45.78328490257263 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_461_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 462 | Training loss: 3.711198339275285 | Elapsed time: 45.87877082824707 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_462_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 463 | Training loss: 3.692698235605277 | Elapsed time: 45.82216191291809 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_463_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 464 | Training loss: 3.6955341872046974 | Elapsed time: 45.81164050102234 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_464_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 465 | Training loss: 3.711622439178766 | Elapsed time: 45.77434492111206 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_465_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 466 | Training loss: 3.7141761359046486 | Elapsed time: 45.82018494606018 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_466_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 467 | Training loss: 3.712543567021688 | Elapsed time: 45.84186291694641 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_467_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 468 | Training loss: 3.677422640370388 | Elapsed time: 45.87438082695007 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_468_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 469 | Training loss: 3.6818100938610003 | Elapsed time: 45.80971431732178 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_469_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 470 | Training loss: 3.680539285435396 | Elapsed time: 46.05600929260254 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_470_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 471 | Training loss: 3.682006410523957 | Elapsed time: 45.76978850364685 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_471_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 472 | Training loss: 3.6530905611374798 | Elapsed time: 45.75235557556152 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_472_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 473 | Training loss: 3.7130888630362118 | Elapsed time: 45.89635133743286 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_473_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 474 | Training loss: 3.683614693435968 | Elapsed time: 45.7465341091156 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_474_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 475 | Training loss: 3.695204229915843 | Elapsed time: 45.86286020278931 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_475_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 476 | Training loss: 3.677377027623794 | Elapsed time: 45.839394092559814 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_476_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 477 | Training loss: 3.6797606991786584 | Elapsed time: 45.77860641479492 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_477_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 478 | Training loss: 3.6820947048710844 | Elapsed time: 45.6973717212677 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_478_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 479 | Training loss: 3.664548920650108 | Elapsed time: 45.75576663017273 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_479_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 480 | Training loss: 3.665831500408696 | Elapsed time: 45.76611924171448 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_480_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 481 | Training loss: 3.675371338339413 | Elapsed time: 45.83564901351929 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_481_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 482 | Training loss: 3.664026760587505 | Elapsed time: 45.819575786590576 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_482_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 483 | Training loss: 3.666058287901037 | Elapsed time: 45.800058364868164 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_483_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 484 | Training loss: 3.6691698513778985 | Elapsed time: 45.76993680000305 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_484_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 485 | Training loss: 3.6817943956337724 | Elapsed time: 45.87932014465332 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_485_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 486 | Training loss: 3.6831733993455473 | Elapsed time: 45.82701110839844 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_486_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 487 | Training loss: 3.677093122519699 | Elapsed time: 45.72465801239014 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_487_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 488 | Training loss: 3.640575839023964 | Elapsed time: 45.730587005615234 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_488_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 489 | Training loss: 3.654621774075078 | Elapsed time: 45.87994384765625 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_489_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 490 | Training loss: 3.627704017302569 | Elapsed time: 45.725367307662964 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_490_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 491 | Training loss: 3.6529789952670826 | Elapsed time: 45.75209403038025 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_491_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 492 | Training loss: 3.6203210260353837 | Elapsed time: 45.73609447479248 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_492_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 493 | Training loss: 3.636124938142066 | Elapsed time: 45.832459449768066 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_493_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 494 | Training loss: 3.646155829523124 | Elapsed time: 45.68715190887451 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_494_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 495 | Training loss: 3.6484201749165854 | Elapsed time: 45.73943257331848 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_495_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 496 | Training loss: 3.663206792345234 | Elapsed time: 45.84268045425415 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_496_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 497 | Training loss: 3.630334634406894 | Elapsed time: 45.75830435752869 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_497_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 498 | Training loss: 3.6201900781369676 | Elapsed time: 45.71403884887695 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_498_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 499 | Training loss: 3.6555950828627046 | Elapsed time: 45.81041073799133 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_499_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 500 | Training loss: 3.619360143063115 | Elapsed time: 45.72457408905029 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_500_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 501 | Training loss: 3.6351062456766763 | Elapsed time: 45.743778467178345 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_501_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 502 | Training loss: 3.6364689340778424 | Elapsed time: 45.868988275527954 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_502_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 503 | Training loss: 3.6366872740726843 | Elapsed time: 45.80035948753357 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_503_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 504 | Training loss: 3.638686741099638 | Elapsed time: 45.80087494850159 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_504_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 505 | Training loss: 3.6192226363163367 | Elapsed time: 45.7088348865509 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_505_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 506 | Training loss: 3.6319725232965805 | Elapsed time: 45.7879056930542 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_506_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 507 | Training loss: 3.615298575046016 | Elapsed time: 45.76285243034363 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_507_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 508 | Training loss: 3.6161576953588748 | Elapsed time: 45.76627779006958 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_508_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 509 | Training loss: 3.6293316588682285 | Elapsed time: 45.758143186569214 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_509_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 510 | Training loss: 3.6241812799491133 | Elapsed time: 45.80730438232422 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_510_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 511 | Training loss: 3.6152775287628174 | Elapsed time: 45.71262311935425 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_511_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 512 | Training loss: 3.6106857832740333 | Elapsed time: 45.830846309661865 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_512_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 513 | Training loss: 3.621421037935743 | Elapsed time: 45.74433994293213 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_513_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 514 | Training loss: 3.620034091612872 | Elapsed time: 45.79603815078735 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_514_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 515 | Training loss: 3.6424954685510373 | Elapsed time: 45.86054491996765 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_515_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 516 | Training loss: 3.6048827264823164 | Elapsed time: 45.79660725593567 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_516_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 517 | Training loss: 3.620084009918512 | Elapsed time: 45.74446249008179 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_517_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 518 | Training loss: 3.606914590386783 | Elapsed time: 45.761038064956665 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_518_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 519 | Training loss: 3.58143211813534 | Elapsed time: 45.811211824417114 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_519_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 520 | Training loss: 3.6074790580599916 | Elapsed time: 45.85003137588501 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_520_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 521 | Training loss: 3.5960206798478667 | Elapsed time: 45.799206018447876 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_521_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 522 | Training loss: 3.578035873525283 | Elapsed time: 45.7489058971405 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_522_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 523 | Training loss: 3.587947639764524 | Elapsed time: 45.775434732437134 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_523_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 524 | Training loss: 3.5918150696099973 | Elapsed time: 45.73317074775696 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_524_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 525 | Training loss: 3.582823599086088 | Elapsed time: 45.78147315979004 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_525_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 526 | Training loss: 3.610096492019354 | Elapsed time: 45.916542768478394 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_526_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 527 | Training loss: 3.5824657514983533 | Elapsed time: 45.81623029708862 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_527_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 528 | Training loss: 3.60139248417873 | Elapsed time: 45.714343309402466 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_528_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 529 | Training loss: 3.5673714198318183 | Elapsed time: 45.65109872817993 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_529_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 530 | Training loss: 3.603390614191691 | Elapsed time: 45.87249231338501 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_530_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 531 | Training loss: 3.5637780731799555 | Elapsed time: 45.8129026889801 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_531_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 532 | Training loss: 3.559175739101335 | Elapsed time: 45.91944146156311 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_532_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 533 | Training loss: 3.588287727505553 | Elapsed time: 45.87603998184204 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_533_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 534 | Training loss: 3.593456455305511 | Elapsed time: 45.75836133956909 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_534_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 535 | Training loss: 3.577378502079085 | Elapsed time: 45.876540184020996 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_535_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 536 | Training loss: 3.547340926002054 | Elapsed time: 45.8982937335968 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_536_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 537 | Training loss: 3.578274876463647 | Elapsed time: 45.70437669754028 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_537_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 538 | Training loss: 3.5836132133708283 | Elapsed time: 45.8391797542572 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_538_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 539 | Training loss: 3.576363526138605 | Elapsed time: 45.86131501197815 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_539_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 540 | Training loss: 3.55753763516744 | Elapsed time: 45.782726526260376 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_540_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 541 | Training loss: 3.590172421698477 | Elapsed time: 45.786593198776245 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_541_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 542 | Training loss: 3.566668430964152 | Elapsed time: 45.780797481536865 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_542_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 543 | Training loss: 3.57463150398404 | Elapsed time: 45.72766613960266 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_543_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 544 | Training loss: 3.5538858759636973 | Elapsed time: 45.85027861595154 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_544_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 545 | Training loss: 3.572381804971134 | Elapsed time: 45.807167768478394 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_545_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 546 | Training loss: 3.561085537368176 | Elapsed time: 45.73611903190613 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_546_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 547 | Training loss: 3.5630816010867847 | Elapsed time: 45.72577738761902 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_547_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 548 | Training loss: 3.5855934666652307 | Elapsed time: 45.87589502334595 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_548_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 549 | Training loss: 3.591406410815669 | Elapsed time: 45.889238119125366 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_549_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 550 | Training loss: 3.572818690655278 | Elapsed time: 45.78890109062195 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_550_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 551 | Training loss: 3.568417165793625 | Elapsed time: 45.747254848480225 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_551_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 552 | Training loss: 3.562666794833015 | Elapsed time: 45.75851917266846 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_552_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 553 | Training loss: 3.549495664297366 | Elapsed time: 45.77099347114563 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_553_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 554 | Training loss: 3.569104330212462 | Elapsed time: 45.732834815979004 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_554_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 555 | Training loss: 3.544728194966036 | Elapsed time: 45.73093867301941 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_555_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 556 | Training loss: 3.5710075743058147 | Elapsed time: 45.7553915977478 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_556_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 557 | Training loss: 3.581113707785513 | Elapsed time: 45.69562768936157 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_557_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 558 | Training loss: 3.5615744029774383 | Elapsed time: 45.755027294158936 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_558_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 559 | Training loss: 3.574236425698972 | Elapsed time: 45.77834725379944 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_559_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 560 | Training loss: 3.521183719822005 | Elapsed time: 45.86951971054077 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_560_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 561 | Training loss: 3.545920180339439 | Elapsed time: 45.82133150100708 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_561_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 562 | Training loss: 3.5438902378082275 | Elapsed time: 45.698291301727295 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_562_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 563 | Training loss: 3.549007780411664 | Elapsed time: 45.85015654563904 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_563_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 564 | Training loss: 3.527584814557842 | Elapsed time: 45.81065225601196 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_564_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 565 | Training loss: 3.5518528386658312 | Elapsed time: 45.72011852264404 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_565_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 566 | Training loss: 3.5222255575890635 | Elapsed time: 45.795082330703735 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_566_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 567 | Training loss: 3.536365597855811 | Elapsed time: 45.77018880844116 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_567_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 568 | Training loss: 3.5467810817793306 | Elapsed time: 45.89712190628052 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_568_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 569 | Training loss: 3.5153792932921766 | Elapsed time: 45.77942943572998 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_569_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 570 | Training loss: 3.547316509134629 | Elapsed time: 45.76272797584534 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_570_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 571 | Training loss: 3.5322247112498566 | Elapsed time: 45.759705781936646 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_571_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 572 | Training loss: 3.5076413294848274 | Elapsed time: 45.7932231426239 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_572_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 573 | Training loss: 3.5378591733820297 | Elapsed time: 45.79315710067749 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_573_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 574 | Training loss: 3.537105723923328 | Elapsed time: 45.845287799835205 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_574_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 575 | Training loss: 3.530006081450219 | Elapsed time: 45.910768032073975 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_575_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 576 | Training loss: 3.5380446630365707 | Elapsed time: 45.748570680618286 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_576_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 577 | Training loss: 3.5262426067801083 | Elapsed time: 45.71668887138367 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_577_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 578 | Training loss: 3.5175578173469093 | Elapsed time: 45.787497997283936 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_578_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 579 | Training loss: 3.543039737963209 | Elapsed time: 45.751216888427734 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_579_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 580 | Training loss: 3.5464063765955904 | Elapsed time: 45.77888584136963 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_580_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 581 | Training loss: 3.5461328403622496 | Elapsed time: 45.8108971118927 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_581_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 582 | Training loss: 3.5045634484758565 | Elapsed time: 45.75655937194824 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_582_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 583 | Training loss: 3.4783373066023286 | Elapsed time: 45.79543876647949 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_583_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 584 | Training loss: 3.5270931954477347 | Elapsed time: 45.89269280433655 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_584_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 585 | Training loss: 3.483154334273993 | Elapsed time: 45.79225182533264 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_585_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 586 | Training loss: 3.480585159039965 | Elapsed time: 45.766990184783936 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_586_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 587 | Training loss: 3.4840409054475674 | Elapsed time: 45.754605531692505 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_587_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 588 | Training loss: 3.5189931392669678 | Elapsed time: 45.91545653343201 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_588_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 589 | Training loss: 3.4901255065319585 | Elapsed time: 45.71482253074646 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_589_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 590 | Training loss: 3.484754029442282 | Elapsed time: 45.92859482765198 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_590_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 591 | Training loss: 3.522638905282114 | Elapsed time: 45.73933529853821 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_591_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 592 | Training loss: 3.4944469414505304 | Elapsed time: 45.839985847473145 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_592_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 593 | Training loss: 3.521314401252597 | Elapsed time: 45.769554138183594 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_593_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 594 | Training loss: 3.507628688625261 | Elapsed time: 45.771488428115845 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_594_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 595 | Training loss: 3.4837665137122658 | Elapsed time: 45.793320417404175 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_595_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 596 | Training loss: 3.501101568633435 | Elapsed time: 45.78919839859009 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_596_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 597 | Training loss: 3.499072841569489 | Elapsed time: 45.75569224357605 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_597_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 598 | Training loss: 3.5184629103716683 | Elapsed time: 45.80490517616272 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_598_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 599 | Training loss: 3.5156732400258384 | Elapsed time: 45.91169261932373 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_599_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 600 | Training loss: 3.50284304338343 | Elapsed time: 45.78575778007507 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_600_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 601 | Training loss: 3.4755333358166265 | Elapsed time: 45.84081244468689 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_601_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 602 | Training loss: 3.4743070228427064 | Elapsed time: 45.85515570640564 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_602_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 603 | Training loss: 3.497099703433467 | Elapsed time: 45.8853554725647 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_603_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 604 | Training loss: 3.4852357004203047 | Elapsed time: 45.804861545562744 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_604_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 605 | Training loss: 3.5046979025298475 | Elapsed time: 45.80718207359314 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_605_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 606 | Training loss: 3.4858686035754634 | Elapsed time: 45.78587365150452 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_606_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 607 | Training loss: 3.4780100887896968 | Elapsed time: 45.78765678405762 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_607_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 608 | Training loss: 3.489569995917526 | Elapsed time: 45.75650405883789 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_608_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 609 | Training loss: 3.476328550600538 | Elapsed time: 45.725624561309814 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_609_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 610 | Training loss: 3.475485521204331 | Elapsed time: 45.84169864654541 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_610_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 611 | Training loss: 3.447518699309405 | Elapsed time: 45.70341920852661 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_611_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 612 | Training loss: 3.4768132088231107 | Elapsed time: 45.75921678543091 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_612_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 613 | Training loss: 3.4564170323166192 | Elapsed time: 45.75544857978821 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_613_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 614 | Training loss: 3.4656789302825928 | Elapsed time: 45.74786162376404 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_614_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 615 | Training loss: 3.472490016151877 | Elapsed time: 45.78177809715271 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_615_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 616 | Training loss: 3.454250503988827 | Elapsed time: 45.731683015823364 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_616_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 617 | Training loss: 3.461532195409139 | Elapsed time: 45.783872842788696 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_617_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 618 | Training loss: 3.4650278699164296 | Elapsed time: 45.75779104232788 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_618_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 619 | Training loss: 3.4736216302011527 | Elapsed time: 45.74503779411316 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_619_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 620 | Training loss: 3.490294045093013 | Elapsed time: 45.79294419288635 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_620_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 621 | Training loss: 3.4441431363423667 | Elapsed time: 45.75602960586548 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_621_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 622 | Training loss: 3.4695125374139524 | Elapsed time: 45.8571994304657 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_622_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 623 | Training loss: 3.4796549385669184 | Elapsed time: 45.90036392211914 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_623_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 624 | Training loss: 3.473395071777643 | Elapsed time: 45.79806852340698 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_624_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 625 | Training loss: 3.4466037423002955 | Elapsed time: 45.77465867996216 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_625_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 626 | Training loss: 3.4454045342464075 | Elapsed time: 45.75342321395874 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_626_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 627 | Training loss: 3.4701486101337506 | Elapsed time: 45.940277338027954 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_627_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 628 | Training loss: 3.460931843402339 | Elapsed time: 45.81644940376282 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_628_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 629 | Training loss: 3.4589916911779666 | Elapsed time: 45.7768759727478 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_629_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 630 | Training loss: 3.445219829970715 | Elapsed time: 45.786335468292236 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_630_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 631 | Training loss: 3.433040824590945 | Elapsed time: 45.846187353134155 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_631_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 632 | Training loss: 3.468237989089068 | Elapsed time: 46.03614616394043 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_632_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 633 | Training loss: 3.4576060584947172 | Elapsed time: 45.79711699485779 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_633_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 634 | Training loss: 3.4560338001625213 | Elapsed time: 45.727110147476196 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_634_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 635 | Training loss: 3.4479176904640947 | Elapsed time: 45.835121631622314 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_635_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 636 | Training loss: 3.479832279915903 | Elapsed time: 45.742878913879395 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_636_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 637 | Training loss: 3.4434015610638786 | Elapsed time: 45.884357213974 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_637_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 638 | Training loss: 3.4343419916489544 | Elapsed time: 45.89390254020691 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_638_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 639 | Training loss: 3.431419718499277 | Elapsed time: 45.841896533966064 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_639_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 640 | Training loss: 3.436525171878291 | Elapsed time: 45.76418662071228 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_640_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 641 | Training loss: 3.4375067037694595 | Elapsed time: 45.82820653915405 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_641_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 642 | Training loss: 3.451072935964547 | Elapsed time: 45.78660297393799 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_642_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 643 | Training loss: 3.427916311750225 | Elapsed time: 45.79098582267761 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_643_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 644 | Training loss: 3.426375244178024 | Elapsed time: 45.85457754135132 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_644_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 645 | Training loss: 3.4513141173942414 | Elapsed time: 45.88391900062561 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_645_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 646 | Training loss: 3.448798221700332 | Elapsed time: 45.83819842338562 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_646_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 647 | Training loss: 3.462198631436217 | Elapsed time: 45.78380990028381 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_647_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 648 | Training loss: 3.4499526210859712 | Elapsed time: 45.84277033805847 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_648_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 649 | Training loss: 3.427376069274603 | Elapsed time: 45.840179204940796 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_649_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 650 | Training loss: 3.4344135031980625 | Elapsed time: 45.825522661209106 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_650_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 651 | Training loss: 3.4216979064193427 | Elapsed time: 45.810025453567505 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_651_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 652 | Training loss: 3.439679192561729 | Elapsed time: 45.75443243980408 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_652_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 653 | Training loss: 3.4339582546084535 | Elapsed time: 45.84472036361694 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_653_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 654 | Training loss: 3.420341444950478 | Elapsed time: 45.91008377075195 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_654_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 655 | Training loss: 3.426262004702699 | Elapsed time: 45.89606308937073 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_655_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 656 | Training loss: 3.4186644600886926 | Elapsed time: 45.70050024986267 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_656_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 657 | Training loss: 3.4399611482433246 | Elapsed time: 45.74589776992798 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_657_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 658 | Training loss: 3.4216178071265126 | Elapsed time: 45.78382611274719 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_658_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 659 | Training loss: 3.402527832517437 | Elapsed time: 45.964269161224365 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_659_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 660 | Training loss: 3.448038942673627 | Elapsed time: 45.87224769592285 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_660_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 661 | Training loss: 3.4267391410528445 | Elapsed time: 45.71793866157532 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_661_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 662 | Training loss: 3.4133181338216745 | Elapsed time: 45.738290786743164 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_662_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 663 | Training loss: 3.413786018595976 | Elapsed time: 45.80806922912598 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_663_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 664 | Training loss: 3.4070928938248577 | Elapsed time: 45.86476945877075 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_664_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 665 | Training loss: 3.4092050860909855 | Elapsed time: 45.70464468002319 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_665_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 666 | Training loss: 3.4069464159946814 | Elapsed time: 45.746559143066406 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_666_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 667 | Training loss: 3.401941883797739 | Elapsed time: 45.813716411590576 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_667_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 668 | Training loss: 3.4039810171314313 | Elapsed time: 45.77735733985901 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_668_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 669 | Training loss: 3.4129997281467213 | Elapsed time: 45.712830543518066 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_669_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 670 | Training loss: 3.4094667855431053 | Elapsed time: 45.72591757774353 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_670_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 671 | Training loss: 3.4134438739103428 | Elapsed time: 45.74758243560791 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_671_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 672 | Training loss: 3.4011783553104773 | Elapsed time: 45.74297261238098 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_672_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 673 | Training loss: 3.4384715884339574 | Elapsed time: 45.733285903930664 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_673_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 674 | Training loss: 3.4027365375967586 | Elapsed time: 45.804418325424194 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_674_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 675 | Training loss: 3.397232644698199 | Elapsed time: 45.81641507148743 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_675_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 676 | Training loss: 3.397431181926353 | Elapsed time: 45.91093444824219 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_676_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 677 | Training loss: 3.4115364364549223 | Elapsed time: 45.779741287231445 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_677_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 678 | Training loss: 3.4043630338182638 | Elapsed time: 45.81273341178894 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_678_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 679 | Training loss: 3.388638314078836 | Elapsed time: 45.835040807724 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_679_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 680 | Training loss: 3.3909787103241564 | Elapsed time: 45.74800944328308 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_680_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 681 | Training loss: 3.4117853594761267 | Elapsed time: 45.72181463241577 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_681_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 682 | Training loss: 3.418470237769333 | Elapsed time: 45.777135133743286 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_682_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 683 | Training loss: 3.390886292738073 | Elapsed time: 45.747565507888794 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_683_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 684 | Training loss: 3.396926547966751 | Elapsed time: 45.78527593612671 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_684_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 685 | Training loss: 3.409362059013516 | Elapsed time: 45.75014686584473 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_685_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 686 | Training loss: 3.3746892003452076 | Elapsed time: 45.748932123184204 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_686_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 687 | Training loss: 3.363074363446703 | Elapsed time: 45.83728766441345 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_687_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 688 | Training loss: 3.3956575346928015 | Elapsed time: 45.749544858932495 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_688_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 689 | Training loss: 3.4019162047143077 | Elapsed time: 45.80002403259277 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_689_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 690 | Training loss: 3.412513541240318 | Elapsed time: 45.82964611053467 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_690_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 691 | Training loss: 3.3925332415337657 | Elapsed time: 45.682889461517334 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_691_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 692 | Training loss: 3.384935411752439 | Elapsed time: 45.784111976623535 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_692_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 693 | Training loss: 3.3959051580990063 | Elapsed time: 45.929304361343384 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_693_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 694 | Training loss: 3.3860376348682477 | Elapsed time: 45.83712863922119 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_694_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 695 | Training loss: 3.368409694409838 | Elapsed time: 45.77704858779907 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_695_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 696 | Training loss: 3.380637701819925 | Elapsed time: 45.69623279571533 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_696_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 697 | Training loss: 3.3936251472024357 | Elapsed time: 45.93108248710632 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_697_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 698 | Training loss: 3.380735158920288 | Elapsed time: 45.82321834564209 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_698_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 699 | Training loss: 3.387145678202311 | Elapsed time: 45.844234228134155 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_699_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 700 | Training loss: 3.369647783391616 | Elapsed time: 45.71201300621033 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_700_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 701 | Training loss: 3.387667029511695 | Elapsed time: 45.77713632583618 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_701_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 702 | Training loss: 3.393901450961244 | Elapsed time: 45.77651119232178 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_702_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 703 | Training loss: 3.3773706847546148 | Elapsed time: 45.75605797767639 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_703_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 704 | Training loss: 3.3696053261850394 | Elapsed time: 45.90200066566467 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_704_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 705 | Training loss: 3.37614873343823 | Elapsed time: 45.73957371711731 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_705_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 706 | Training loss: 3.3733947791305243 | Elapsed time: 45.727792263031006 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_706_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 707 | Training loss: 3.390608034881891 | Elapsed time: 45.73291516304016 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_707_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 708 | Training loss: 3.3532675013822666 | Elapsed time: 45.75384712219238 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_708_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 709 | Training loss: 3.3549047264398313 | Elapsed time: 45.74395799636841 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_709_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 710 | Training loss: 3.3628121450835584 | Elapsed time: 45.84468364715576 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_710_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 711 | Training loss: 3.368505697624356 | Elapsed time: 45.76213312149048 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_711_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 712 | Training loss: 3.3683313014460543 | Elapsed time: 45.750454902648926 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_712_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 713 | Training loss: 3.354392472435446 | Elapsed time: 45.86088275909424 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_713_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 714 | Training loss: 3.376609993915932 | Elapsed time: 45.8176646232605 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_714_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 715 | Training loss: 3.359300454457601 | Elapsed time: 45.83746337890625 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_715_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 716 | Training loss: 3.3614618637982536 | Elapsed time: 45.76125407218933 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_716_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 717 | Training loss: 3.3694473668640734 | Elapsed time: 45.75048780441284 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_717_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 718 | Training loss: 3.3624661763509116 | Elapsed time: 45.83478045463562 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_718_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 719 | Training loss: 3.3662441524804807 | Elapsed time: 45.723921060562134 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_719_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 720 | Training loss: 3.3388667433869603 | Elapsed time: 45.959994077682495 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_720_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 721 | Training loss: 3.356076708026961 | Elapsed time: 45.72275733947754 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_721_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 722 | Training loss: 3.350555181503296 | Elapsed time: 45.834737062454224 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_722_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 723 | Training loss: 3.362653250787772 | Elapsed time: 45.68653678894043 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_723_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 724 | Training loss: 3.368948604546341 | Elapsed time: 45.793015241622925 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_724_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 725 | Training loss: 3.372517281887578 | Elapsed time: 45.80940103530884 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_725_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 726 | Training loss: 3.378237532634361 | Elapsed time: 45.814557790756226 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_726_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 727 | Training loss: 3.3638266254873836 | Elapsed time: 45.901580572128296 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_727_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 728 | Training loss: 3.365880774516685 | Elapsed time: 45.73447799682617 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_728_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 729 | Training loss: 3.329318462633619 | Elapsed time: 45.85200500488281 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_729_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 730 | Training loss: 3.3518226053200517 | Elapsed time: 45.79005718231201 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_730_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 731 | Training loss: 3.3455851732515822 | Elapsed time: 45.81258964538574 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_731_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 732 | Training loss: 3.3624265661426618 | Elapsed time: 45.88011837005615 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_732_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 733 | Training loss: 3.349372069040934 | Elapsed time: 45.79597854614258 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_733_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 734 | Training loss: 3.360925880132937 | Elapsed time: 45.74649000167847 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_734_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 735 | Training loss: 3.350231086506563 | Elapsed time: 45.72745609283447 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_735_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 736 | Training loss: 3.328285591275084 | Elapsed time: 45.79785227775574 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_736_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 737 | Training loss: 3.3390114354152307 | Elapsed time: 45.742233753204346 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_737_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 738 | Training loss: 3.335846928989186 | Elapsed time: 45.748496532440186 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_738_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 739 | Training loss: 3.3267808007258997 | Elapsed time: 45.766828775405884 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_739_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 740 | Training loss: 3.331011365441715 | Elapsed time: 45.82563090324402 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_740_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 741 | Training loss: 3.3554541036194445 | Elapsed time: 45.9176709651947 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_741_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 742 | Training loss: 3.3349211870455275 | Elapsed time: 45.79129242897034 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_742_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 743 | Training loss: 3.348447416342941 | Elapsed time: 45.81622505187988 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_743_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 744 | Training loss: 3.3349306770399507 | Elapsed time: 45.77122926712036 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_744_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 745 | Training loss: 3.321698371101828 | Elapsed time: 45.78172302246094 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_745_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 746 | Training loss: 3.3290297190348306 | Elapsed time: 45.785921573638916 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_746_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 747 | Training loss: 3.3414585964352477 | Elapsed time: 45.848167181015015 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_747_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 748 | Training loss: 3.3168441828559425 | Elapsed time: 45.719152212142944 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_748_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 749 | Training loss: 3.3463580655116663 | Elapsed time: 45.95047640800476 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_749_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 750 | Training loss: 3.3362362618539847 | Elapsed time: 45.73959398269653 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_750_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 751 | Training loss: 3.3369619332107843 | Elapsed time: 45.78945875167847 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_751_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 752 | Training loss: 3.3404207556855443 | Elapsed time: 45.87595558166504 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_752_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 753 | Training loss: 3.35509095472448 | Elapsed time: 46.09421634674072 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_753_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 754 | Training loss: 3.3481145606321445 | Elapsed time: 45.69322609901428 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_754_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 755 | Training loss: 3.322430274065803 | Elapsed time: 45.778557538986206 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_755_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 756 | Training loss: 3.3245648917029884 | Elapsed time: 45.836188077926636 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_756_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 757 | Training loss: 3.320547903285307 | Elapsed time: 45.791160106658936 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_757_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 758 | Training loss: 3.3183081477296117 | Elapsed time: 45.81965136528015 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_758_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 759 | Training loss: 3.3269822410508696 | Elapsed time: 45.79757356643677 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_759_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 760 | Training loss: 3.3340928648032393 | Elapsed time: 45.74722647666931 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_760_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 761 | Training loss: 3.336704226101146 | Elapsed time: 45.70831823348999 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_761_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 762 | Training loss: 3.2979468364341584 | Elapsed time: 45.76383113861084 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_762_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 763 | Training loss: 3.3365168758467134 | Elapsed time: 45.710284948349 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_763_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 764 | Training loss: 3.3111818257500145 | Elapsed time: 45.8201322555542 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_764_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 765 | Training loss: 3.3015831405041265 | Elapsed time: 45.864667892456055 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_765_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 766 | Training loss: 3.3183281328163896 | Elapsed time: 45.73379468917847 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_766_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 767 | Training loss: 3.314885190888947 | Elapsed time: 45.85969877243042 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_767_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 768 | Training loss: 3.315400563034357 | Elapsed time: 45.93468976020813 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_768_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 769 | Training loss: 3.313843287673651 | Elapsed time: 45.81492614746094 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_769_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 770 | Training loss: 3.3117684803757013 | Elapsed time: 45.861151933670044 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_770_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 771 | Training loss: 3.309352486741309 | Elapsed time: 45.69027662277222 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_771_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 772 | Training loss: 3.3204152958065856 | Elapsed time: 45.69240212440491 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_772_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 773 | Training loss: 3.317271022235646 | Elapsed time: 45.80265212059021 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_773_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 774 | Training loss: 3.3012900118734323 | Elapsed time: 45.828885555267334 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_774_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 775 | Training loss: 3.329753324097278 | Elapsed time: 45.95775008201599 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_775_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 776 | Training loss: 3.2953991796456132 | Elapsed time: 45.88331747055054 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_776_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 777 | Training loss: 3.3123770835352877 | Elapsed time: 45.95898509025574 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_777_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 778 | Training loss: 3.299463907877604 | Elapsed time: 45.86047983169556 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_778_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 779 | Training loss: 3.3177857492484297 | Elapsed time: 45.74486565589905 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_779_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 780 | Training loss: 3.3179735997143913 | Elapsed time: 45.79038119316101 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_780_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 781 | Training loss: 3.3236827242608165 | Elapsed time: 45.7925910949707 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_781_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 782 | Training loss: 3.312559670093013 | Elapsed time: 45.8011577129364 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_782_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 783 | Training loss: 3.3151454878788367 | Elapsed time: 45.84766483306885 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_783_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 784 | Training loss: 3.305653969446818 | Elapsed time: 45.86239528656006 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_784_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 785 | Training loss: 3.304607307209688 | Elapsed time: 45.74160051345825 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_785_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 786 | Training loss: 3.2938452701942595 | Elapsed time: 45.870511293411255 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_786_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 787 | Training loss: 3.3107739757089054 | Elapsed time: 45.77707886695862 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_787_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 788 | Training loss: 3.2819140518412873 | Elapsed time: 45.75053930282593 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_788_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 789 | Training loss: 3.310374788209504 | Elapsed time: 45.82685375213623 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_789_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 790 | Training loss: 3.297648495318843 | Elapsed time: 45.746527433395386 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_790_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 791 | Training loss: 3.2784140857995725 | Elapsed time: 45.760072469711304 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_791_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 792 | Training loss: 3.2994145271824857 | Elapsed time: 45.74454998970032 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_792_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 793 | Training loss: 3.2993720886754057 | Elapsed time: 45.779062032699585 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_793_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 794 | Training loss: 3.2863571924321793 | Elapsed time: 45.887033462524414 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_794_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 795 | Training loss: 3.2812916157292387 | Elapsed time: 45.70432114601135 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_795_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 796 | Training loss: 3.294144485511032 | Elapsed time: 45.71732997894287 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_796_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 797 | Training loss: 3.298261184318393 | Elapsed time: 45.72027802467346 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_797_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 798 | Training loss: 3.293034492754469 | Elapsed time: 45.86810874938965 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_798_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 799 | Training loss: 3.281677442438462 | Elapsed time: 45.72739505767822 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_799_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 800 | Training loss: 3.2835147287331377 | Elapsed time: 45.77690601348877 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_800_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 801 | Training loss: 3.3069611577426685 | Elapsed time: 45.77650046348572 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_801_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 802 | Training loss: 3.294171179042143 | Elapsed time: 45.77850699424744 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_802_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 803 | Training loss: 3.274918088725969 | Elapsed time: 45.74750590324402 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_803_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 804 | Training loss: 3.3029133946287867 | Elapsed time: 45.779778242111206 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_804_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 805 | Training loss: 3.292972896613327 | Elapsed time: 45.74056625366211 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_805_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 806 | Training loss: 3.3096012882157866 | Elapsed time: 45.77444815635681 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_806_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 807 | Training loss: 3.2799935107137643 | Elapsed time: 45.7419548034668 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_807_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 808 | Training loss: 3.2981327468273687 | Elapsed time: 45.7389702796936 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_808_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 809 | Training loss: 3.2607025258681355 | Elapsed time: 45.74720907211304 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_809_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 810 | Training loss: 3.2754572372810515 | Elapsed time: 45.973819971084595 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_810_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 811 | Training loss: 3.299025572982489 | Elapsed time: 45.85221576690674 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_811_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 812 | Training loss: 3.2823664020089542 | Elapsed time: 45.78744101524353 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_812_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 813 | Training loss: 3.309272420172598 | Elapsed time: 45.85684275627136 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_813_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 814 | Training loss: 3.2829446044622683 | Elapsed time: 45.86495566368103 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_814_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 815 | Training loss: 3.2924884440852145 | Elapsed time: 46.05440282821655 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_815_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 816 | Training loss: 3.2798394969865385 | Elapsed time: 45.876389265060425 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_816_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 817 | Training loss: 3.2959973625108305 | Elapsed time: 45.75635290145874 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_817_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 818 | Training loss: 3.2634027424980614 | Elapsed time: 45.81657648086548 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_818_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 819 | Training loss: 3.293073397056729 | Elapsed time: 45.786614179611206 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_819_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 820 | Training loss: 3.2842959469439936 | Elapsed time: 45.78309369087219 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_820_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 821 | Training loss: 3.300823169596055 | Elapsed time: 45.776243925094604 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_821_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 822 | Training loss: 3.259668752258899 | Elapsed time: 45.699788331985474 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_822_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 823 | Training loss: 3.25858585507262 | Elapsed time: 45.73154354095459 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_823_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 824 | Training loss: 3.2753486399557077 | Elapsed time: 45.684102058410645 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_824_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 825 | Training loss: 3.2729226607902375 | Elapsed time: 45.81658053398132 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_825_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 826 | Training loss: 3.273906324423996 | Elapsed time: 45.774869441986084 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_826_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 827 | Training loss: 3.26339464561612 | Elapsed time: 45.744176149368286 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_827_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 828 | Training loss: 3.249436336405137 | Elapsed time: 45.69532823562622 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_828_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 829 | Training loss: 3.2725283351599 | Elapsed time: 45.875242710113525 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_829_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 830 | Training loss: 3.2588926109613157 | Elapsed time: 45.82628321647644 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_830_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 831 | Training loss: 3.2777293663398894 | Elapsed time: 45.854875326156616 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_831_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 832 | Training loss: 3.2713133961546657 | Elapsed time: 45.78198838233948 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_832_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 833 | Training loss: 3.2639542280458937 | Elapsed time: 45.79107069969177 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_833_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 834 | Training loss: 3.2383769306482053 | Elapsed time: 45.801634550094604 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_834_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 835 | Training loss: 3.245335826686784 | Elapsed time: 46.024526596069336 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_835_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 836 | Training loss: 3.2642014026641846 | Elapsed time: 45.838419675827026 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_836_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 837 | Training loss: 3.27343329261331 | Elapsed time: 45.715089082717896 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_837_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 838 | Training loss: 3.2580586741952335 | Elapsed time: 45.86319661140442 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_838_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 839 | Training loss: 3.248390730689554 | Elapsed time: 45.863078355789185 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_839_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 840 | Training loss: 3.255780000312656 | Elapsed time: 45.84237861633301 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_840_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 841 | Training loss: 3.269609904756733 | Elapsed time: 45.86471486091614 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_841_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 842 | Training loss: 3.2721140992407705 | Elapsed time: 45.78761863708496 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_842_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 843 | Training loss: 3.251365951463288 | Elapsed time: 45.82637286186218 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_843_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 844 | Training loss: 3.257873128442203 | Elapsed time: 45.778329849243164 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_844_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 845 | Training loss: 3.2467067007924997 | Elapsed time: 45.772852182388306 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_845_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 846 | Training loss: 3.2517814589481726 | Elapsed time: 45.78894567489624 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_846_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 847 | Training loss: 3.2339858167311726 | Elapsed time: 45.7571759223938 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_847_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 848 | Training loss: 3.247363805770874 | Elapsed time: 45.79052186012268 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_848_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 849 | Training loss: 3.236186429566028 | Elapsed time: 45.73117804527283 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_849_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 850 | Training loss: 3.251913168851067 | Elapsed time: 45.784990072250366 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_850_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 851 | Training loss: 3.2470066921383727 | Elapsed time: 45.74247074127197 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_851_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 852 | Training loss: 3.240520337048699 | Elapsed time: 45.807379484176636 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_852_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 853 | Training loss: 3.259242716957541 | Elapsed time: 45.81432271003723 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_853_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 854 | Training loss: 3.2144405374339984 | Elapsed time: 45.905190229415894 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_854_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 855 | Training loss: 3.243920494528378 | Elapsed time: 45.902403831481934 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_855_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 856 | Training loss: 3.2787583388534247 | Elapsed time: 45.77020573616028 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_856_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 857 | Training loss: 3.2497914538663975 | Elapsed time: 45.81914186477661 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_857_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 858 | Training loss: 3.256933380575741 | Elapsed time: 45.75200390815735 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_858_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 859 | Training loss: 3.2414246867684757 | Elapsed time: 45.8846709728241 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_859_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 860 | Training loss: 3.2496828378415574 | Elapsed time: 45.95163083076477 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_860_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 861 | Training loss: 3.238712787628174 | Elapsed time: 45.875510454177856 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_861_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 862 | Training loss: 3.259305051728791 | Elapsed time: 45.84184908866882 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_862_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 863 | Training loss: 3.2571983056909897 | Elapsed time: 45.82075309753418 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_863_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 864 | Training loss: 3.2425047696805467 | Elapsed time: 45.80563521385193 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_864_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 865 | Training loss: 3.240553626827165 | Elapsed time: 45.90229654312134 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_865_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 866 | Training loss: 3.230975539076562 | Elapsed time: 45.87152338027954 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_866_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 867 | Training loss: 3.243410694832895 | Elapsed time: 45.92978072166443 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_867_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 868 | Training loss: 3.2641771166932347 | Elapsed time: 45.87632489204407 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_868_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 869 | Training loss: 3.2434479068307316 | Elapsed time: 45.93513202667236 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_869_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 870 | Training loss: 3.2315039307463405 | Elapsed time: 45.80361342430115 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_870_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 871 | Training loss: 3.243482603746302 | Elapsed time: 46.02249622344971 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_871_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 872 | Training loss: 3.222190398795932 | Elapsed time: 45.86360168457031 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_872_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 873 | Training loss: 3.222977044535618 | Elapsed time: 45.9001784324646 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_873_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 874 | Training loss: 3.2394346770118263 | Elapsed time: 45.880104303359985 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_874_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 875 | Training loss: 3.234564566144756 | Elapsed time: 45.824058055877686 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_875_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 876 | Training loss: 3.2284633412080654 | Elapsed time: 45.7710075378418 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_876_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 877 | Training loss: 3.196126657373765 | Elapsed time: 45.76940417289734 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_877_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 878 | Training loss: 3.206315835316976 | Elapsed time: 45.73999786376953 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_878_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 879 | Training loss: 3.2553634222816017 | Elapsed time: 45.75161004066467 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_879_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 880 | Training loss: 3.2212914345311185 | Elapsed time: 45.72123718261719 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_880_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 881 | Training loss: 3.2195806456547156 | Elapsed time: 45.75735664367676 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_881_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 882 | Training loss: 3.2442444072050205 | Elapsed time: 45.798919677734375 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_882_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 883 | Training loss: 3.227922149733001 | Elapsed time: 45.9940927028656 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_883_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 884 | Training loss: 3.226678020813886 | Elapsed time: 45.799766063690186 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_884_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 885 | Training loss: 3.212430397669474 | Elapsed time: 45.777162075042725 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_885_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 886 | Training loss: 3.2344199816385903 | Elapsed time: 45.87092399597168 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_886_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 887 | Training loss: 3.225316127141317 | Elapsed time: 45.75570321083069 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_887_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 888 | Training loss: 3.210686075921152 | Elapsed time: 45.966856241226196 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_888_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 889 | Training loss: 3.2208757026522767 | Elapsed time: 45.769104957580566 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_889_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 890 | Training loss: 3.21197247505188 | Elapsed time: 45.7897310256958 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_890_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 891 | Training loss: 3.2153212556651996 | Elapsed time: 45.74602174758911 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_891_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 892 | Training loss: 3.2142492182114544 | Elapsed time: 45.765315771102905 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_892_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 893 | Training loss: 3.213445462432562 | Elapsed time: 45.97023010253906 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_893_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 894 | Training loss: 3.206687515857173 | Elapsed time: 45.84853720664978 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_894_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 895 | Training loss: 3.2230760022705676 | Elapsed time: 45.747395038604736 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_895_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 896 | Training loss: 3.2070801725574567 | Elapsed time: 45.73761510848999 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_896_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 897 | Training loss: 3.194332155526853 | Elapsed time: 45.94874930381775 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_897_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 898 | Training loss: 3.223978037927665 | Elapsed time: 45.937819719314575 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_898_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 899 | Training loss: 3.2097222851771936 | Elapsed time: 45.746865034103394 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_899_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 900 | Training loss: 3.2106725328108845 | Elapsed time: 45.96319103240967 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_900_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 901 | Training loss: 3.216743417814666 | Elapsed time: 45.749064922332764 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_901_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 902 | Training loss: 3.2184481527291093 | Elapsed time: 45.722793102264404 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_902_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 903 | Training loss: 3.1834608106052174 | Elapsed time: 45.854044675827026 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_903_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 904 | Training loss: 3.2009614448921355 | Elapsed time: 45.7710440158844 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_904_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 905 | Training loss: 3.2286741125817393 | Elapsed time: 45.749603271484375 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_905_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 906 | Training loss: 3.2076444859598197 | Elapsed time: 45.742478370666504 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_906_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 907 | Training loss: 3.202671957950966 | Elapsed time: 45.7882354259491 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_907_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 908 | Training loss: 3.197186764548807 | Elapsed time: 45.826945543289185 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_908_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 909 | Training loss: 3.202100342395259 | Elapsed time: 45.778706073760986 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_909_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 910 | Training loss: 3.210111884509816 | Elapsed time: 45.70129752159119 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_910_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 911 | Training loss: 3.205800556669048 | Elapsed time: 45.7495813369751 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_911_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 912 | Training loss: 3.1957302233752083 | Elapsed time: 45.72144055366516 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_912_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 913 | Training loss: 3.2297529379526773 | Elapsed time: 45.82232332229614 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_913_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 914 | Training loss: 3.1869749929390703 | Elapsed time: 45.71951627731323 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_914_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 915 | Training loss: 3.201433761447084 | Elapsed time: 45.829710483551025 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_915_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 916 | Training loss: 3.203998000014062 | Elapsed time: 45.80248475074768 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_916_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 917 | Training loss: 3.197692534502815 | Elapsed time: 45.75815725326538 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_917_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 918 | Training loss: 3.211427796120737 | Elapsed time: 45.83730864524841 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_918_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 919 | Training loss: 3.2120739759183397 | Elapsed time: 45.7730827331543 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_919_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 920 | Training loss: 3.2009345550163117 | Elapsed time: 45.76390767097473 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_920_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 921 | Training loss: 3.185875238156786 | Elapsed time: 45.79355978965759 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_921_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 922 | Training loss: 3.2247561800713633 | Elapsed time: 45.852410554885864 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_922_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 923 | Training loss: 3.2146036578159705 | Elapsed time: 45.76791071891785 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_923_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 924 | Training loss: 3.202478955773746 | Elapsed time: 45.794888734817505 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_924_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 925 | Training loss: 3.1815923709495393 | Elapsed time: 45.73709034919739 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_925_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 926 | Training loss: 3.208765717113719 | Elapsed time: 45.95245814323425 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_926_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 927 | Training loss: 3.198117447834389 | Elapsed time: 45.71832537651062 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_927_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 928 | Training loss: 3.181800323374131 | Elapsed time: 45.8283793926239 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_928_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 929 | Training loss: 3.208513068217857 | Elapsed time: 45.77105736732483 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_929_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 930 | Training loss: 3.1910216527826645 | Elapsed time: 45.81977200508118 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_930_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 931 | Training loss: 3.1953987374025234 | Elapsed time: 45.78352379798889 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_931_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 932 | Training loss: 3.2102738548727596 | Elapsed time: 45.75445866584778 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_932_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 933 | Training loss: 3.1807073611839143 | Elapsed time: 45.765809059143066 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_933_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 934 | Training loss: 3.205295712340112 | Elapsed time: 45.793620347976685 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_934_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 935 | Training loss: 3.1869831038456335 | Elapsed time: 45.871684074401855 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_935_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 936 | Training loss: 3.177934249242147 | Elapsed time: 45.83258247375488 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_936_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 937 | Training loss: 3.191955337337419 | Elapsed time: 45.72021007537842 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_937_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 938 | Training loss: 3.1895584826375925 | Elapsed time: 45.93417954444885 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_938_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 939 | Training loss: 3.1893845399220786 | Elapsed time: 45.91195368766785 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_939_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 940 | Training loss: 3.1972895182815253 | Elapsed time: 45.95687651634216 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_940_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 941 | Training loss: 3.1926750669292376 | Elapsed time: 45.76249980926514 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_941_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 942 | Training loss: 3.1933641620710786 | Elapsed time: 45.91539430618286 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_942_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 943 | Training loss: 3.169050408344643 | Elapsed time: 45.901912212371826 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_943_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 944 | Training loss: 3.196798146939745 | Elapsed time: 45.842026233673096 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_944_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 945 | Training loss: 3.176518122355143 | Elapsed time: 45.75954532623291 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_945_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 946 | Training loss: 3.189837240705303 | Elapsed time: 45.983665466308594 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_946_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 947 | Training loss: 3.1813867372625015 | Elapsed time: 45.74690771102905 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_947_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 948 | Training loss: 3.183935193454518 | Elapsed time: 45.72437238693237 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_948_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 949 | Training loss: 3.1738395831164192 | Elapsed time: 45.8190495967865 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_949_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 950 | Training loss: 3.1997441731247247 | Elapsed time: 45.90559935569763 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_950_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 951 | Training loss: 3.167474489586026 | Elapsed time: 45.792677879333496 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_951_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 952 | Training loss: 3.167218699174769 | Elapsed time: 45.84005951881409 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_952_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 953 | Training loss: 3.1859580816007127 | Elapsed time: 45.71840000152588 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_953_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 954 | Training loss: 3.191533369176528 | Elapsed time: 45.902761936187744 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_954_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 955 | Training loss: 3.1489024209041223 | Elapsed time: 45.775776624679565 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_955_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 956 | Training loss: 3.1995854097254135 | Elapsed time: 45.87200713157654 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_956_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 957 | Training loss: 3.1904760575761983 | Elapsed time: 45.831228256225586 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_957_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 958 | Training loss: 3.1875580198624553 | Elapsed time: 45.751171350479126 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_958_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 959 | Training loss: 3.164497034222472 | Elapsed time: 45.9128041267395 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_959_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 960 | Training loss: 3.1527769659079756 | Elapsed time: 45.74024844169617 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_960_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 961 | Training loss: 3.166620086221134 | Elapsed time: 45.69774794578552 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_961_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 962 | Training loss: 3.194343370549819 | Elapsed time: 45.803977727890015 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_962_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 963 | Training loss: 3.1764922516018737 | Elapsed time: 45.94443607330322 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_963_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 964 | Training loss: 3.171038548151652 | Elapsed time: 45.919475078582764 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_964_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 965 | Training loss: 3.1922590265087054 | Elapsed time: 45.90211057662964 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_965_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 966 | Training loss: 3.1806196511960496 | Elapsed time: 45.86445164680481 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_966_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 967 | Training loss: 3.1730667563045727 | Elapsed time: 45.87425780296326 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_967_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 968 | Training loss: 3.145582923702165 | Elapsed time: 46.02084517478943 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_968_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 969 | Training loss: 3.173544434940114 | Elapsed time: 45.84981656074524 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_969_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 970 | Training loss: 3.17197333130182 | Elapsed time: 45.73045825958252 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_970_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 971 | Training loss: 3.1701860147364 | Elapsed time: 45.82064771652222 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_971_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 972 | Training loss: 3.1690396935332057 | Elapsed time: 45.92505979537964 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_972_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 973 | Training loss: 3.150895076639512 | Elapsed time: 45.71269345283508 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_973_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 974 | Training loss: 3.1630441488004197 | Elapsed time: 45.83788514137268 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_974_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 975 | Training loss: 3.1684789704341516 | Elapsed time: 45.90096378326416 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_975_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 976 | Training loss: 3.179400495454377 | Elapsed time: 45.7769980430603 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_976_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 977 | Training loss: 3.1793937963597916 | Elapsed time: 45.8331298828125 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_977_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 978 | Training loss: 3.1453133143630683 | Elapsed time: 45.90857481956482 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_978_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 979 | Training loss: 3.1940570999594295 | Elapsed time: 45.83131790161133 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_979_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 980 | Training loss: 3.1770531523461436 | Elapsed time: 45.82987880706787 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_980_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 981 | Training loss: 3.1500862953709623 | Elapsed time: 45.844868183135986 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_981_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 982 | Training loss: 3.1503615239087273 | Elapsed time: 45.81258416175842 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_982_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 983 | Training loss: 3.15973638085758 | Elapsed time: 45.77117729187012 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_983_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 984 | Training loss: 3.177945744757559 | Elapsed time: 45.80985999107361 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_984_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 985 | Training loss: 3.1558720925275017 | Elapsed time: 45.78403377532959 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_985_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 986 | Training loss: 3.1651826465831085 | Elapsed time: 45.8384804725647 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_986_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 987 | Training loss: 3.1560501351076016 | Elapsed time: 45.89458632469177 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_987_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 988 | Training loss: 3.1578329217200185 | Elapsed time: 45.77847194671631 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_988_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 989 | Training loss: 3.1324850437687894 | Elapsed time: 45.782209157943726 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_989_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 990 | Training loss: 3.1533754666646323 | Elapsed time: 45.80735373497009 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_990_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 991 | Training loss: 3.139298626020843 | Elapsed time: 45.75693106651306 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_991_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 992 | Training loss: 3.187654088525211 | Elapsed time: 45.754016637802124 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_992_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 993 | Training loss: 3.1679962429345823 | Elapsed time: 45.83856201171875 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_993_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 994 | Training loss: 3.1384397675009335 | Elapsed time: 45.74906897544861 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_994_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 995 | Training loss: 3.132409839069142 | Elapsed time: 45.81954216957092 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_995_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 996 | Training loss: 3.1621961406632964 | Elapsed time: 45.855876207351685 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_996_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 997 | Training loss: 3.1355190090104643 | Elapsed time: 46.04213070869446 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_997_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 998 | Training loss: 3.132141202103858 | Elapsed time: 45.885008811950684 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_998_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Epoch: 999 | Training loss: 3.1623815788942227 | Elapsed time: 45.8616738319397 +Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_999_imagenet_1_GPT_gimel_256b_0.0003lr_Adamo_0s.pt +Done