eminorhan commited on
Commit
fea8e2a
·
1 Parent(s): e0f111f

Upload 26 files

Browse files
gpt_pretrained_models/imagenet_10_gimel.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d0fa98ba810f306959d80084cfb8c5236875ad9a46f11a906ce8fe4a82c09a9
3
+ size 9017855382
gpt_pretrained_models/imagenet_1_gimel.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d7d2385d0eb636ddc0fc9314dc86618ebcfea43b6e709a5e520dd46414d797a
3
+ size 9017854230
gpt_pretrained_models/imagenet_alef.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89eb6606d08421379174c4dd1792ef30c2ccaa4e27df3bd2ad1d2dc4afadde9
3
+ size 1328322315
gpt_pretrained_models/imagenet_bet.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f5c34592ab8e711788163bf6196cb0db7f32f65ef01519bab64b86ba2c351d
3
+ size 4040141298
gpt_pretrained_models/imagenet_dalet.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a8aa04876f29834cfbb342a940e6064a44ae3026869398a8445b5a822e214c6
3
+ size 18354036978
gpt_pretrained_models/imagenet_gimel.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdfa67dc7af131ab677330cd3cb54db212e258c54e2b6dcb9bcd19e5eb66f90d
3
+ size 9017855382
gpt_pretrained_models/saycam_gimel.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:487b993ede20da265d731cc3aefbebd701c1cc01d0d527d1fbc935c443729baf
3
+ size 9017855382
logs/imagenet_10_gimel_0.out ADDED
@@ -0,0 +1,823 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.1, resume='', save_prefix='imagenet_10', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.1, resume='', save_prefix='imagenet_10', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.1, resume='', save_prefix='imagenet_10', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.1, resume='', save_prefix='imagenet_10', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.1, resume='', save_prefix='imagenet_10', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.1, resume='', save_prefix='imagenet_10', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.1, resume='', save_prefix='imagenet_10', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, subsample=0.1, resume='', save_prefix='imagenet_10', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ model:
10
+ base_learning_rate: 4.5e-06
11
+ params:
12
+ ddconfig:
13
+ attn_resolutions:
14
+ - 16
15
+ ch: 128
16
+ ch_mult:
17
+ - 1
18
+ - 1
19
+ - 2
20
+ - 2
21
+ - 4
22
+ double_z: false
23
+ dropout: 0.0
24
+ in_channels: 3
25
+ num_res_blocks: 2
26
+ out_ch: 3
27
+ resolution: 256
28
+ z_channels: 256
29
+ embed_dim: 256
30
+ lossconfig:
31
+ params:
32
+ codebook_weight: 1.0
33
+ disc_conditional: false
34
+ disc_in_channels: 3
35
+ disc_num_layers: 2
36
+ disc_start: 0
37
+ disc_weight: 0.75
38
+ target: vqloss.VQLPIPSWithDiscriminator
39
+ monitor: val/rec_loss
40
+ n_embed: 16384
41
+ target: vqmodel.VQModel
42
+
43
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
44
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
45
+ VQLPIPSWithDiscriminator running with hinge loss.
46
+ Loaded VQ encoder.
47
+ Data loaded: dataset contains 128116 images, and takes 501 training iterations per epoch.
48
+ Number of parameters: 750659840
49
+ Running on 8 GPUs total
50
+ => no checkpoint loaded, will train from scratch
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
56
+ warnings.warn(warning.format(ret))
57
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
58
+ warnings.warn(warning.format(ret))
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ Epoch: 0 | Training loss: 6.704032366861126 | Elapsed time: 450.7831690311432
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 1 | Training loss: 6.495718568622947 | Elapsed time: 446.97644543647766
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 2 | Training loss: 6.3533412948577945 | Elapsed time: 446.74239683151245
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 3 | Training loss: 6.188957873932615 | Elapsed time: 446.72487688064575
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 4 | Training loss: 6.0746586651145345 | Elapsed time: 446.52367997169495
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 5 | Training loss: 6.005652911172893 | Elapsed time: 446.77147579193115
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 6 | Training loss: 5.938131176307054 | Elapsed time: 446.90416169166565
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 7 | Training loss: 5.909726595926189 | Elapsed time: 446.64437890052795
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 8 | Training loss: 5.873030560697148 | Elapsed time: 446.67359232902527
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 9 | Training loss: 5.851492369722226 | Elapsed time: 446.7589247226715
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 10 | Training loss: 5.825499537462246 | Elapsed time: 446.75472497940063
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 11 | Training loss: 5.814141161189584 | Elapsed time: 446.55490469932556
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 12 | Training loss: 5.791773802743938 | Elapsed time: 446.43999218940735
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 13 | Training loss: 5.781572082085524 | Elapsed time: 447.0822563171387
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 14 | Training loss: 5.762389411469419 | Elapsed time: 446.623676776886
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 15 | Training loss: 5.7504855020793375 | Elapsed time: 446.7000503540039
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 16 | Training loss: 5.746330522015661 | Elapsed time: 446.82352566719055
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 17 | Training loss: 5.731455185218247 | Elapsed time: 446.45564818382263
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 18 | Training loss: 5.723884436898603 | Elapsed time: 446.5765073299408
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 19 | Training loss: 5.711342798259682 | Elapsed time: 446.66976046562195
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 20 | Training loss: 5.706820347113999 | Elapsed time: 446.5763199329376
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 21 | Training loss: 5.692491517095509 | Elapsed time: 446.6747624874115
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 22 | Training loss: 5.693201843611971 | Elapsed time: 446.698203086853
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 23 | Training loss: 5.678540070851644 | Elapsed time: 446.8017203807831
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 24 | Training loss: 5.666430976814377 | Elapsed time: 446.56217217445374
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 25 | Training loss: 5.67353915121265 | Elapsed time: 446.6621129512787
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 26 | Training loss: 5.65771843097405 | Elapsed time: 446.3762786388397
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 27 | Training loss: 5.655462828462947 | Elapsed time: 446.5811059474945
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 28 | Training loss: 5.639899507968011 | Elapsed time: 446.45018792152405
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 29 | Training loss: 5.6308224148855 | Elapsed time: 446.58169436454773
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 30 | Training loss: 5.630201196004292 | Elapsed time: 446.4503273963928
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 31 | Training loss: 5.631200758045067 | Elapsed time: 449.28973841667175
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 32 | Training loss: 5.6194819376140295 | Elapsed time: 446.6562821865082
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 33 | Training loss: 5.615416231745494 | Elapsed time: 446.453635931015
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
135
+ Epoch: 34 | Training loss: 5.601784017985453 | Elapsed time: 446.58876395225525
136
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
137
+ Epoch: 35 | Training loss: 5.603739685165192 | Elapsed time: 446.6259922981262
138
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
139
+ Epoch: 36 | Training loss: 5.599722937433544 | Elapsed time: 446.4603908061981
140
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
141
+ Epoch: 37 | Training loss: 5.586018453815027 | Elapsed time: 446.5468535423279
142
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
143
+ Epoch: 38 | Training loss: 5.588587773298313 | Elapsed time: 446.5557916164398
144
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_038_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
145
+ Epoch: 39 | Training loss: 5.587331686191217 | Elapsed time: 446.5586664676666
146
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_039_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
147
+ Epoch: 40 | Training loss: 5.57057267034839 | Elapsed time: 446.5168857574463
148
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_040_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
149
+ Epoch: 41 | Training loss: 5.569836828760996 | Elapsed time: 446.65731739997864
150
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_041_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
151
+ Epoch: 42 | Training loss: 5.561255327479806 | Elapsed time: 446.44015407562256
152
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_042_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
153
+ Epoch: 43 | Training loss: 5.5647197921356994 | Elapsed time: 446.5304682254791
154
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_043_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
155
+ Epoch: 44 | Training loss: 5.556291212816676 | Elapsed time: 446.58975052833557
156
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_044_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
157
+ Epoch: 45 | Training loss: 5.551303362893963 | Elapsed time: 446.3050241470337
158
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_045_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
159
+ Epoch: 46 | Training loss: 5.548457943274827 | Elapsed time: 446.728271484375
160
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_046_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
161
+ Epoch: 47 | Training loss: 5.543257104184575 | Elapsed time: 446.5160164833069
162
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_047_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
163
+ Epoch: 48 | Training loss: 5.5345620052543225 | Elapsed time: 446.53871989250183
164
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_048_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
165
+ Epoch: 49 | Training loss: 5.534743250011208 | Elapsed time: 446.6604609489441
166
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_049_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
167
+ Epoch: 50 | Training loss: 5.528743631587533 | Elapsed time: 446.7539954185486
168
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_050_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
169
+ Epoch: 51 | Training loss: 5.526255160272716 | Elapsed time: 446.4551384449005
170
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_051_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
171
+ Epoch: 52 | Training loss: 5.5138973637731254 | Elapsed time: 446.49766206741333
172
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_052_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
173
+ Epoch: 53 | Training loss: 5.513059343882426 | Elapsed time: 446.47829008102417
174
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_053_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
175
+ Epoch: 54 | Training loss: 5.514456262607537 | Elapsed time: 446.45975637435913
176
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_054_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
177
+ Epoch: 55 | Training loss: 5.503334080625675 | Elapsed time: 446.5206174850464
178
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_055_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
179
+ Epoch: 56 | Training loss: 5.494777006541422 | Elapsed time: 446.50645208358765
180
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_056_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
181
+ Epoch: 57 | Training loss: 5.483406873044378 | Elapsed time: 446.35710763931274
182
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_057_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
183
+ Epoch: 58 | Training loss: 5.489371269286988 | Elapsed time: 446.6228256225586
184
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_058_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
185
+ Epoch: 59 | Training loss: 5.475472169483969 | Elapsed time: 446.4766607284546
186
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_059_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
187
+ Epoch: 60 | Training loss: 5.480038032798234 | Elapsed time: 447.0619640350342
188
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_060_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
189
+ Epoch: 61 | Training loss: 5.464671384312673 | Elapsed time: 446.4721043109894
190
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_061_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
191
+ Epoch: 62 | Training loss: 5.471685297236947 | Elapsed time: 446.3247981071472
192
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_062_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
193
+ Epoch: 63 | Training loss: 5.46516306433611 | Elapsed time: 446.6072835922241
194
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_063_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
195
+ Epoch: 64 | Training loss: 5.447195010270901 | Elapsed time: 446.4028387069702
196
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_064_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
197
+ Epoch: 65 | Training loss: 5.448395958441698 | Elapsed time: 446.673956155777
198
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_065_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
199
+ Epoch: 66 | Training loss: 5.442820265383539 | Elapsed time: 446.3735067844391
200
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_066_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
201
+ Epoch: 67 | Training loss: 5.445307013993253 | Elapsed time: 446.6103663444519
202
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_067_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
203
+ Epoch: 68 | Training loss: 5.436424980620425 | Elapsed time: 446.4665369987488
204
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_068_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
205
+ Epoch: 69 | Training loss: 5.421909844328067 | Elapsed time: 446.41515135765076
206
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_069_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
207
+ Epoch: 70 | Training loss: 5.421315743299777 | Elapsed time: 446.59061193466187
208
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_070_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
209
+ Epoch: 71 | Training loss: 5.416916813917027 | Elapsed time: 446.6038417816162
210
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_071_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
211
+ Epoch: 72 | Training loss: 5.417282813561415 | Elapsed time: 446.5357573032379
212
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_072_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
213
+ Epoch: 73 | Training loss: 5.409017088884365 | Elapsed time: 446.397873878479
214
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_073_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
215
+ Epoch: 74 | Training loss: 5.400188472694504 | Elapsed time: 446.7336404323578
216
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_074_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
217
+ Epoch: 75 | Training loss: 5.398452417103354 | Elapsed time: 446.5256471633911
218
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_075_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
219
+ Epoch: 76 | Training loss: 5.38626554911722 | Elapsed time: 446.46908736228943
220
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_076_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
221
+ Epoch: 77 | Training loss: 5.3815229505360005 | Elapsed time: 446.4845290184021
222
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_077_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
223
+ Epoch: 78 | Training loss: 5.380009483672426 | Elapsed time: 446.7757821083069
224
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_078_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
225
+ Epoch: 79 | Training loss: 5.367315599780358 | Elapsed time: 446.7671067714691
226
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_079_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
227
+ Epoch: 80 | Training loss: 5.370492581121936 | Elapsed time: 446.2723126411438
228
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_080_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
229
+ Epoch: 81 | Training loss: 5.365420976322806 | Elapsed time: 446.62510991096497
230
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_081_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
231
+ Epoch: 82 | Training loss: 5.361652893934421 | Elapsed time: 446.4501738548279
232
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_082_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
233
+ Epoch: 83 | Training loss: 5.346971226309588 | Elapsed time: 446.40029311180115
234
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_083_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
235
+ Epoch: 84 | Training loss: 5.354207563305091 | Elapsed time: 446.65940117836
236
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_084_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
237
+ Epoch: 85 | Training loss: 5.340756785607861 | Elapsed time: 446.4922659397125
238
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_085_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
239
+ Epoch: 86 | Training loss: 5.332425079421845 | Elapsed time: 446.5821075439453
240
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_086_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
241
+ Epoch: 87 | Training loss: 5.333958991273435 | Elapsed time: 446.6463327407837
242
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_087_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
243
+ Epoch: 88 | Training loss: 5.322744580799948 | Elapsed time: 446.6519305706024
244
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_088_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
245
+ Epoch: 89 | Training loss: 5.316232797390448 | Elapsed time: 446.2908763885498
246
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_089_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
247
+ Epoch: 90 | Training loss: 5.31501962562759 | Elapsed time: 446.69475865364075
248
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_090_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
249
+ Epoch: 91 | Training loss: 5.3033521903489165 | Elapsed time: 446.5577323436737
250
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_091_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
251
+ Epoch: 92 | Training loss: 5.30730946953901 | Elapsed time: 446.43302512168884
252
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_092_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
253
+ Epoch: 93 | Training loss: 5.30233544123149 | Elapsed time: 446.66130208969116
254
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_093_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
255
+ Epoch: 94 | Training loss: 5.292866284261921 | Elapsed time: 446.5603537559509
256
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_094_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
257
+ Epoch: 95 | Training loss: 5.298837316250372 | Elapsed time: 446.81792974472046
258
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_095_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
259
+ Epoch: 96 | Training loss: 5.284774475706789 | Elapsed time: 446.34987616539
260
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_096_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
261
+ Epoch: 97 | Training loss: 5.273112415077682 | Elapsed time: 446.5175247192383
262
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_097_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
263
+ Epoch: 98 | Training loss: 5.266834392281112 | Elapsed time: 446.6591956615448
264
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_098_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
265
+ Epoch: 99 | Training loss: 5.263471619573658 | Elapsed time: 446.5839567184448
266
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_099_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
267
+ Epoch: 100 | Training loss: 5.266524955422103 | Elapsed time: 446.8115346431732
268
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_100_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
269
+ Epoch: 101 | Training loss: 5.255535310375953 | Elapsed time: 446.39881134033203
270
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_101_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
271
+ Epoch: 102 | Training loss: 5.253226997847567 | Elapsed time: 446.5249252319336
272
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_102_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
273
+ Epoch: 103 | Training loss: 5.241266271549309 | Elapsed time: 446.64659690856934
274
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_103_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
275
+ Epoch: 104 | Training loss: 5.237880653487946 | Elapsed time: 446.53013253211975
276
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_104_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
277
+ Epoch: 105 | Training loss: 5.237486109286249 | Elapsed time: 446.4495050907135
278
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_105_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
279
+ Epoch: 106 | Training loss: 5.232164034586467 | Elapsed time: 446.50557494163513
280
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_106_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
281
+ Epoch: 107 | Training loss: 5.222232354139377 | Elapsed time: 446.3275954723358
282
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_107_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
283
+ Epoch: 108 | Training loss: 5.216840058743597 | Elapsed time: 446.5570592880249
284
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_108_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
285
+ Epoch: 109 | Training loss: 5.208461410271194 | Elapsed time: 446.6285765171051
286
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_109_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
287
+ Epoch: 110 | Training loss: 5.202561829618351 | Elapsed time: 446.64031648635864
288
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_110_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
289
+ Epoch: 111 | Training loss: 5.208137091524349 | Elapsed time: 446.3827438354492
290
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_111_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
291
+ Epoch: 112 | Training loss: 5.209035732551011 | Elapsed time: 446.6988868713379
292
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_112_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
293
+ Epoch: 113 | Training loss: 5.20015106848376 | Elapsed time: 446.5032410621643
294
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_113_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
295
+ Epoch: 114 | Training loss: 5.184763904579147 | Elapsed time: 446.42046642303467
296
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_114_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
297
+ Epoch: 115 | Training loss: 5.186899144254521 | Elapsed time: 446.53126072883606
298
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_115_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
299
+ Epoch: 116 | Training loss: 5.180826071969525 | Elapsed time: 446.4869029521942
300
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_116_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
301
+ Epoch: 117 | Training loss: 5.172246459953324 | Elapsed time: 446.70859694480896
302
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_117_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
303
+ Epoch: 118 | Training loss: 5.170070183729221 | Elapsed time: 446.5050280094147
304
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_118_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
305
+ Epoch: 119 | Training loss: 5.172051846624135 | Elapsed time: 446.36170291900635
306
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_119_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
307
+ Epoch: 120 | Training loss: 5.164714810377109 | Elapsed time: 446.484014749527
308
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_120_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
309
+ Epoch: 121 | Training loss: 5.149690976399861 | Elapsed time: 446.4222719669342
310
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_121_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
311
+ Epoch: 122 | Training loss: 5.161837729151378 | Elapsed time: 446.5930025577545
312
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_122_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
313
+ Epoch: 123 | Training loss: 5.1411697954950695 | Elapsed time: 446.5001130104065
314
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_123_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
315
+ Epoch: 124 | Training loss: 5.13912796260354 | Elapsed time: 446.6390190124512
316
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_124_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
317
+ Epoch: 125 | Training loss: 5.138137930643535 | Elapsed time: 446.5836980342865
318
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_125_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
319
+ Epoch: 126 | Training loss: 5.134608922604316 | Elapsed time: 446.7447738647461
320
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_126_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
321
+ Epoch: 127 | Training loss: 5.135252985887661 | Elapsed time: 446.547687292099
322
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_127_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
323
+ Epoch: 128 | Training loss: 5.130611571009288 | Elapsed time: 446.5457327365875
324
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_128_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
325
+ Epoch: 129 | Training loss: 5.119773346031021 | Elapsed time: 446.5813000202179
326
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_129_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
327
+ Epoch: 130 | Training loss: 5.11265703113731 | Elapsed time: 446.55277705192566
328
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_130_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
329
+ Epoch: 131 | Training loss: 5.1103095903605995 | Elapsed time: 446.46068263053894
330
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_131_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
331
+ Epoch: 132 | Training loss: 5.101351796985862 | Elapsed time: 446.5481126308441
332
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_132_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
333
+ Epoch: 133 | Training loss: 5.0976872786790315 | Elapsed time: 446.2222812175751
334
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_133_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
335
+ Epoch: 134 | Training loss: 5.100807349838896 | Elapsed time: 446.6008059978485
336
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_134_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
337
+ Epoch: 135 | Training loss: 5.093894043844379 | Elapsed time: 446.59854912757874
338
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_135_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
339
+ Epoch: 136 | Training loss: 5.0874139981831386 | Elapsed time: 446.5280866622925
340
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_136_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
341
+ Epoch: 137 | Training loss: 5.083437880594097 | Elapsed time: 446.5345916748047
342
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_137_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
343
+ Epoch: 138 | Training loss: 5.084548791249593 | Elapsed time: 446.4769551753998
344
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_138_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
345
+ Epoch: 139 | Training loss: 5.08713896783764 | Elapsed time: 446.47315406799316
346
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_139_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
347
+ Epoch: 140 | Training loss: 5.070500964889983 | Elapsed time: 446.478312253952
348
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_140_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
349
+ Epoch: 141 | Training loss: 5.071859649079527 | Elapsed time: 446.53048157691956
350
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_141_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
351
+ Epoch: 142 | Training loss: 5.072785209039014 | Elapsed time: 446.5794379711151
352
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_142_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
353
+ Epoch: 143 | Training loss: 5.061541929454385 | Elapsed time: 446.4052879810333
354
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_143_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
355
+ Epoch: 144 | Training loss: 5.056922807902871 | Elapsed time: 446.36341667175293
356
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_144_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
357
+ Epoch: 145 | Training loss: 5.060538934376425 | Elapsed time: 446.165810585022
358
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_145_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
359
+ Epoch: 146 | Training loss: 5.057816811902319 | Elapsed time: 446.41727209091187
360
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_146_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
361
+ Epoch: 147 | Training loss: 5.050764794835073 | Elapsed time: 446.583135843277
362
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_147_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
363
+ Epoch: 148 | Training loss: 5.04246533654645 | Elapsed time: 446.53639459609985
364
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_148_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
365
+ Epoch: 149 | Training loss: 5.032791934327451 | Elapsed time: 446.811341047287
366
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_149_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
367
+ Epoch: 150 | Training loss: 5.030359933476248 | Elapsed time: 446.46685695648193
368
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_150_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
369
+ Epoch: 151 | Training loss: 5.032487726497079 | Elapsed time: 446.47074484825134
370
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_151_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
371
+ Epoch: 152 | Training loss: 5.024114595439857 | Elapsed time: 446.44570684432983
372
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_152_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
373
+ Epoch: 153 | Training loss: 5.028684770275733 | Elapsed time: 446.4385211467743
374
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_153_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
375
+ Epoch: 154 | Training loss: 5.020854159029658 | Elapsed time: 446.4909851551056
376
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_154_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
377
+ Epoch: 155 | Training loss: 5.015135741281414 | Elapsed time: 446.429758310318
378
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_155_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
379
+ Epoch: 156 | Training loss: 5.016808861981847 | Elapsed time: 446.5506258010864
380
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_156_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
381
+ Epoch: 157 | Training loss: 5.012621469364433 | Elapsed time: 446.60350036621094
382
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_157_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
383
+ Epoch: 158 | Training loss: 5.005037530453619 | Elapsed time: 446.37278270721436
384
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_158_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
385
+ Epoch: 159 | Training loss: 5.006857350438893 | Elapsed time: 446.63547348976135
386
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_159_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
387
+ Epoch: 160 | Training loss: 5.000944343155729 | Elapsed time: 446.47282457351685
388
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_160_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
389
+ Epoch: 161 | Training loss: 5.002537342840564 | Elapsed time: 446.49424481391907
390
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_161_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
391
+ Epoch: 162 | Training loss: 5.0048723858511615 | Elapsed time: 446.4727430343628
392
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_162_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
393
+ Epoch: 163 | Training loss: 4.986107316083775 | Elapsed time: 446.55107522010803
394
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_163_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
395
+ Epoch: 164 | Training loss: 4.98010977347216 | Elapsed time: 446.5054647922516
396
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_164_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
397
+ Epoch: 165 | Training loss: 4.9797894692944435 | Elapsed time: 446.2857701778412
398
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_165_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
399
+ Epoch: 166 | Training loss: 4.970546591067743 | Elapsed time: 446.427659034729
400
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_166_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
401
+ Epoch: 167 | Training loss: 4.978284227633905 | Elapsed time: 446.25680899620056
402
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_167_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
403
+ Epoch: 168 | Training loss: 4.970927132817799 | Elapsed time: 446.4250228404999
404
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_168_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
405
+ Epoch: 169 | Training loss: 4.962742638921071 | Elapsed time: 446.4886622428894
406
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_169_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
407
+ Epoch: 170 | Training loss: 4.965734242917059 | Elapsed time: 446.4931056499481
408
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_170_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
409
+ Epoch: 171 | Training loss: 4.9620063185929775 | Elapsed time: 446.41749715805054
410
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_171_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
411
+ Epoch: 172 | Training loss: 4.96497626028613 | Elapsed time: 446.66369795799255
412
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_172_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
413
+ Epoch: 173 | Training loss: 4.960228634451678 | Elapsed time: 446.67620611190796
414
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_173_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
415
+ Epoch: 174 | Training loss: 4.956287600085169 | Elapsed time: 446.589307308197
416
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_174_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
417
+ Epoch: 175 | Training loss: 4.945075490041646 | Elapsed time: 446.5921039581299
418
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_175_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
419
+ Epoch: 176 | Training loss: 4.936810024246246 | Elapsed time: 446.41429710388184
420
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_176_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
421
+ Epoch: 177 | Training loss: 4.941316371430418 | Elapsed time: 446.5008547306061
422
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_177_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
423
+ Epoch: 178 | Training loss: 4.937057132492522 | Elapsed time: 446.4843611717224
424
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_178_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
425
+ Epoch: 179 | Training loss: 4.938810555045 | Elapsed time: 446.92725896835327
426
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_179_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
427
+ Epoch: 180 | Training loss: 4.943496357657001 | Elapsed time: 446.51037073135376
428
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_180_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
429
+ Epoch: 181 | Training loss: 4.936356372224119 | Elapsed time: 446.6137490272522
430
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_181_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
431
+ Epoch: 182 | Training loss: 4.924862990122356 | Elapsed time: 446.7020072937012
432
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_182_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
433
+ Epoch: 183 | Training loss: 4.9229664897728345 | Elapsed time: 446.2006058692932
434
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_183_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
435
+ Epoch: 184 | Training loss: 4.928866616742102 | Elapsed time: 446.5794720649719
436
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_184_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
437
+ Epoch: 185 | Training loss: 4.922529528003015 | Elapsed time: 446.4739592075348
438
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_185_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
439
+ Epoch: 186 | Training loss: 4.90685538902968 | Elapsed time: 446.55876898765564
440
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_186_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
441
+ Epoch: 187 | Training loss: 4.912347830698161 | Elapsed time: 446.4477803707123
442
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_187_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
443
+ Epoch: 188 | Training loss: 4.906415610970138 | Elapsed time: 446.6924750804901
444
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_188_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
445
+ Epoch: 189 | Training loss: 4.905801264825695 | Elapsed time: 446.4118595123291
446
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_189_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
447
+ Epoch: 190 | Training loss: 4.901555731386957 | Elapsed time: 446.44686818122864
448
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_190_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
449
+ Epoch: 191 | Training loss: 4.902420015392189 | Elapsed time: 446.4947054386139
450
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_191_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
451
+ Epoch: 192 | Training loss: 4.899651957605175 | Elapsed time: 446.5274157524109
452
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_192_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
453
+ Epoch: 193 | Training loss: 4.8943827137975635 | Elapsed time: 446.68523645401
454
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_193_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
455
+ Epoch: 194 | Training loss: 4.895509021248884 | Elapsed time: 446.9319167137146
456
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_194_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
457
+ Epoch: 195 | Training loss: 4.884021680036229 | Elapsed time: 446.4286003112793
458
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_195_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
459
+ Epoch: 196 | Training loss: 4.880214611213364 | Elapsed time: 446.6331889629364
460
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_196_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
461
+ Epoch: 197 | Training loss: 4.881232708989979 | Elapsed time: 446.3934819698334
462
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_197_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
463
+ Epoch: 198 | Training loss: 4.884239900135946 | Elapsed time: 446.49623465538025
464
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_198_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
465
+ Epoch: 199 | Training loss: 4.885690037123934 | Elapsed time: 446.6739070415497
466
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_199_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
467
+ Epoch: 200 | Training loss: 4.87081289481736 | Elapsed time: 446.43318915367126
468
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_200_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
469
+ Epoch: 201 | Training loss: 4.8784590671638295 | Elapsed time: 446.43744802474976
470
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_201_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
471
+ Epoch: 202 | Training loss: 4.869548059985071 | Elapsed time: 446.3968939781189
472
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_202_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
473
+ Epoch: 203 | Training loss: 4.862007838761259 | Elapsed time: 446.4935300350189
474
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_203_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
475
+ Epoch: 204 | Training loss: 4.86219982520311 | Elapsed time: 446.52201223373413
476
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_204_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
477
+ Epoch: 205 | Training loss: 4.865887713289546 | Elapsed time: 446.58243060112
478
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_205_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
479
+ Epoch: 206 | Training loss: 4.857351989327315 | Elapsed time: 446.6824481487274
480
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_206_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
481
+ Epoch: 207 | Training loss: 4.864123796512505 | Elapsed time: 446.40831232070923
482
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_207_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
483
+ Epoch: 208 | Training loss: 4.8548495974131445 | Elapsed time: 446.6814706325531
484
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_208_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
485
+ Epoch: 209 | Training loss: 4.848059782724895 | Elapsed time: 446.6260824203491
486
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_209_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
487
+ Epoch: 210 | Training loss: 4.851716256665137 | Elapsed time: 446.60520696640015
488
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_210_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
489
+ Epoch: 211 | Training loss: 4.849057347950583 | Elapsed time: 446.75018525123596
490
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_211_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
491
+ Epoch: 212 | Training loss: 4.8364508527957515 | Elapsed time: 446.3601517677307
492
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_212_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
493
+ Epoch: 213 | Training loss: 4.845736318005773 | Elapsed time: 446.4241695404053
494
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_213_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
495
+ Epoch: 214 | Training loss: 4.838569034835298 | Elapsed time: 446.5815005302429
496
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_214_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
497
+ Epoch: 215 | Training loss: 4.831241166996147 | Elapsed time: 446.6450252532959
498
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_215_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
499
+ Epoch: 216 | Training loss: 4.840987417750254 | Elapsed time: 446.5573434829712
500
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_216_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
501
+ Epoch: 217 | Training loss: 4.83956558547334 | Elapsed time: 446.55172181129456
502
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_217_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
503
+ Epoch: 218 | Training loss: 4.82624133713469 | Elapsed time: 446.5181736946106
504
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_218_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
505
+ Epoch: 219 | Training loss: 4.820723751585879 | Elapsed time: 446.6336796283722
506
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_219_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
507
+ Epoch: 220 | Training loss: 4.836335638088142 | Elapsed time: 446.4615762233734
508
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_220_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
509
+ Epoch: 221 | Training loss: 4.821460673433102 | Elapsed time: 446.55925583839417
510
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_221_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
511
+ Epoch: 222 | Training loss: 4.824226751536904 | Elapsed time: 446.6058750152588
512
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_222_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
513
+ Epoch: 223 | Training loss: 4.8224912108537445 | Elapsed time: 446.584876537323
514
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_223_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
515
+ Epoch: 224 | Training loss: 4.813569323983259 | Elapsed time: 446.56761360168457
516
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_224_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
517
+ Epoch: 225 | Training loss: 4.813728448635565 | Elapsed time: 446.5501401424408
518
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_225_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
519
+ Epoch: 226 | Training loss: 4.809035680965035 | Elapsed time: 446.4956920146942
520
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_226_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
521
+ Epoch: 227 | Training loss: 4.8052766746627595 | Elapsed time: 446.54684233665466
522
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_227_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
523
+ Epoch: 228 | Training loss: 4.813008650096353 | Elapsed time: 446.4580159187317
524
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_228_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
525
+ Epoch: 229 | Training loss: 4.803138717681824 | Elapsed time: 446.58640217781067
526
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_229_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
527
+ Epoch: 230 | Training loss: 4.799989308187824 | Elapsed time: 446.41652607917786
528
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_230_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
529
+ Epoch: 231 | Training loss: 4.800323698573008 | Elapsed time: 446.492219209671
530
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_231_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
531
+ Epoch: 232 | Training loss: 4.7899678196021895 | Elapsed time: 446.5756027698517
532
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_232_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
533
+ Epoch: 233 | Training loss: 4.788998596206635 | Elapsed time: 446.57351565361023
534
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_233_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
535
+ Epoch: 234 | Training loss: 4.7846281685515075 | Elapsed time: 446.5464389324188
536
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_234_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
537
+ Epoch: 235 | Training loss: 4.794743030609009 | Elapsed time: 446.49897813796997
538
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_235_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
539
+ Epoch: 236 | Training loss: 4.7895377267620525 | Elapsed time: 446.5257842540741
540
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_236_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
541
+ Epoch: 237 | Training loss: 4.797935851319822 | Elapsed time: 446.26309084892273
542
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_237_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
543
+ Epoch: 238 | Training loss: 4.790795013100325 | Elapsed time: 446.3788664340973
544
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_238_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
545
+ Epoch: 239 | Training loss: 4.78405855087463 | Elapsed time: 446.4530870914459
546
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_239_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
547
+ Epoch: 240 | Training loss: 4.779900985801529 | Elapsed time: 446.43403244018555
548
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_240_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
549
+ Epoch: 241 | Training loss: 4.7792105627155115 | Elapsed time: 446.32818126678467
550
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_241_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
551
+ Epoch: 242 | Training loss: 4.773524209172901 | Elapsed time: 446.5482814311981
552
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_242_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
553
+ Epoch: 243 | Training loss: 4.772514941925536 | Elapsed time: 446.7249581813812
554
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_243_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
555
+ Epoch: 244 | Training loss: 4.77109370831244 | Elapsed time: 446.51592993736267
556
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_244_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
557
+ Epoch: 245 | Training loss: 4.760093645183388 | Elapsed time: 446.4663062095642
558
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_245_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
559
+ Epoch: 246 | Training loss: 4.760649909516294 | Elapsed time: 446.3416225910187
560
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_246_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
561
+ Epoch: 247 | Training loss: 4.766211241305231 | Elapsed time: 446.4574553966522
562
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_247_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
563
+ Epoch: 248 | Training loss: 4.760945586625211 | Elapsed time: 446.44516491889954
564
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_248_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
565
+ Epoch: 249 | Training loss: 4.762481148847325 | Elapsed time: 446.28744411468506
566
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_249_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
567
+ Epoch: 250 | Training loss: 4.754441808559699 | Elapsed time: 446.4202184677124
568
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_250_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
569
+ Epoch: 251 | Training loss: 4.7546881753765415 | Elapsed time: 446.68989157676697
570
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_251_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
571
+ Epoch: 252 | Training loss: 4.757867058356127 | Elapsed time: 446.34573674201965
572
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_252_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
573
+ Epoch: 253 | Training loss: 4.742248494230107 | Elapsed time: 446.53063702583313
574
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_253_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
575
+ Epoch: 254 | Training loss: 4.7443302624715775 | Elapsed time: 446.6658959388733
576
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_254_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
577
+ Epoch: 255 | Training loss: 4.753744191038394 | Elapsed time: 446.57498955726624
578
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_255_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
579
+ Epoch: 256 | Training loss: 4.742700941310433 | Elapsed time: 446.38922786712646
580
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_256_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
581
+ Epoch: 257 | Training loss: 4.74141551396566 | Elapsed time: 446.4317831993103
582
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_257_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
583
+ Epoch: 258 | Training loss: 4.735845487750695 | Elapsed time: 446.58635091781616
584
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_258_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
585
+ Epoch: 259 | Training loss: 4.736322201178697 | Elapsed time: 446.28218507766724
586
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_259_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
587
+ Epoch: 260 | Training loss: 4.7381404831023985 | Elapsed time: 446.4551799297333
588
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_260_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
589
+ Epoch: 261 | Training loss: 4.730437982106161 | Elapsed time: 446.2397994995117
590
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_261_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
591
+ Epoch: 262 | Training loss: 4.731532865893579 | Elapsed time: 446.6462650299072
592
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_262_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
593
+ Epoch: 263 | Training loss: 4.733560082441318 | Elapsed time: 446.5397388935089
594
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_263_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
595
+ Epoch: 264 | Training loss: 4.73039195542326 | Elapsed time: 446.5376753807068
596
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_264_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
597
+ Epoch: 265 | Training loss: 4.720909685907726 | Elapsed time: 446.64150738716125
598
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_265_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
599
+ Epoch: 266 | Training loss: 4.729083671303329 | Elapsed time: 446.5861065387726
600
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_266_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
601
+ Epoch: 267 | Training loss: 4.714321835074358 | Elapsed time: 446.59493112564087
602
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_267_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
603
+ Epoch: 268 | Training loss: 4.720631594667416 | Elapsed time: 446.277215719223
604
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_268_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
605
+ Epoch: 269 | Training loss: 4.715209013925579 | Elapsed time: 446.4296028614044
606
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_269_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
607
+ Epoch: 270 | Training loss: 4.713364458369638 | Elapsed time: 446.32166028022766
608
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_270_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
609
+ Epoch: 271 | Training loss: 4.710538854618035 | Elapsed time: 446.4028706550598
610
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_271_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
611
+ Epoch: 272 | Training loss: 4.7116195770080935 | Elapsed time: 446.494181394577
612
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_272_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
613
+ Epoch: 273 | Training loss: 4.71744314258446 | Elapsed time: 446.5525999069214
614
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_273_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
615
+ Epoch: 274 | Training loss: 4.706201235453288 | Elapsed time: 446.476459980011
616
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_274_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
617
+ Epoch: 275 | Training loss: 4.7047474607974 | Elapsed time: 446.36352348327637
618
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_275_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
619
+ Epoch: 276 | Training loss: 4.7095904435940135 | Elapsed time: 446.4227886199951
620
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_276_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
621
+ Epoch: 277 | Training loss: 4.698420522693627 | Elapsed time: 446.30607080459595
622
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_277_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
623
+ Epoch: 278 | Training loss: 4.699224645268179 | Elapsed time: 446.63017868995667
624
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_278_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
625
+ Epoch: 279 | Training loss: 4.702050812468081 | Elapsed time: 446.67204689979553
626
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_279_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
627
+ Epoch: 280 | Training loss: 4.6960384346053985 | Elapsed time: 446.58084297180176
628
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_280_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
629
+ Epoch: 281 | Training loss: 4.6933618324721404 | Elapsed time: 446.5077290534973
630
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_281_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
631
+ Epoch: 282 | Training loss: 4.690649377133794 | Elapsed time: 446.5900378227234
632
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_282_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
633
+ Epoch: 283 | Training loss: 4.687035040941067 | Elapsed time: 446.5930004119873
634
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_283_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
635
+ Epoch: 284 | Training loss: 4.679940796659854 | Elapsed time: 446.4246597290039
636
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_284_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
637
+ Epoch: 285 | Training loss: 4.68046858781826 | Elapsed time: 446.3603096008301
638
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_285_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
639
+ Epoch: 286 | Training loss: 4.679989604416959 | Elapsed time: 446.55233788490295
640
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_286_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
641
+ Epoch: 287 | Training loss: 4.684978662136786 | Elapsed time: 446.39942955970764
642
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_287_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
643
+ Epoch: 288 | Training loss: 4.676942589278231 | Elapsed time: 446.4226689338684
644
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_288_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
645
+ Epoch: 289 | Training loss: 4.670525197735327 | Elapsed time: 446.49077010154724
646
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_289_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
647
+ Epoch: 290 | Training loss: 4.6771681541930175 | Elapsed time: 446.3869013786316
648
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_290_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
649
+ Epoch: 291 | Training loss: 4.671153043796441 | Elapsed time: 446.5442671775818
650
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_291_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
651
+ Epoch: 292 | Training loss: 4.667791839607223 | Elapsed time: 446.4855365753174
652
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_292_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
653
+ Epoch: 293 | Training loss: 4.672721196553426 | Elapsed time: 446.77307772636414
654
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_293_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
655
+ Epoch: 294 | Training loss: 4.671959221244096 | Elapsed time: 446.42590045928955
656
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_294_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
657
+ Epoch: 295 | Training loss: 4.673462619324644 | Elapsed time: 446.50548672676086
658
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_295_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
659
+ Epoch: 296 | Training loss: 4.6726811841100515 | Elapsed time: 446.6944832801819
660
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_296_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
661
+ Epoch: 297 | Training loss: 4.668021327721145 | Elapsed time: 446.4713532924652
662
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_297_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
663
+ Epoch: 298 | Training loss: 4.6697016618922795 | Elapsed time: 446.441410779953
664
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_298_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
665
+ Epoch: 299 | Training loss: 4.669370149661919 | Elapsed time: 446.5139889717102
666
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_299_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
667
+ Epoch: 300 | Training loss: 4.663941831645851 | Elapsed time: 446.5561776161194
668
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_300_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
669
+ Epoch: 301 | Training loss: 4.6637854737911875 | Elapsed time: 446.68469285964966
670
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_301_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
671
+ Epoch: 302 | Training loss: 4.645583058545689 | Elapsed time: 446.36661648750305
672
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_302_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
673
+ Epoch: 303 | Training loss: 4.651561250705681 | Elapsed time: 446.7886209487915
674
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_303_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
675
+ Epoch: 304 | Training loss: 4.6523869813321355 | Elapsed time: 446.5078628063202
676
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_304_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
677
+ Epoch: 305 | Training loss: 4.650938758355177 | Elapsed time: 446.5429034233093
678
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_305_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
679
+ Epoch: 306 | Training loss: 4.653626910226787 | Elapsed time: 446.2524092197418
680
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_306_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
681
+ Epoch: 307 | Training loss: 4.650223275144657 | Elapsed time: 446.5203149318695
682
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_307_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
683
+ Epoch: 308 | Training loss: 4.653562509609078 | Elapsed time: 446.59948205947876
684
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_308_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
685
+ Epoch: 309 | Training loss: 4.64235200425108 | Elapsed time: 446.5511300563812
686
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_309_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
687
+ Epoch: 310 | Training loss: 4.646510617223805 | Elapsed time: 446.4342336654663
688
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_310_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
689
+ Epoch: 311 | Training loss: 4.640764667602356 | Elapsed time: 446.53635025024414
690
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_311_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
691
+ Epoch: 312 | Training loss: 4.641510570358611 | Elapsed time: 446.3814344406128
692
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_312_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
693
+ Epoch: 313 | Training loss: 4.638994779415473 | Elapsed time: 446.5864017009735
694
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_313_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
695
+ Epoch: 314 | Training loss: 4.634196266204773 | Elapsed time: 446.47272419929504
696
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_314_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
697
+ Epoch: 315 | Training loss: 4.6343707248360335 | Elapsed time: 446.67076659202576
698
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_315_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
699
+ Epoch: 316 | Training loss: 4.629071268016944 | Elapsed time: 446.4249906539917
700
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_316_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
701
+ Epoch: 317 | Training loss: 4.628696033340728 | Elapsed time: 446.41057085990906
702
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_317_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
703
+ Epoch: 318 | Training loss: 4.630949092720321 | Elapsed time: 446.4318196773529
704
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_318_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
705
+ Epoch: 319 | Training loss: 4.627474035807475 | Elapsed time: 446.3962540626526
706
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_319_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
707
+ Epoch: 320 | Training loss: 4.6249794503172 | Elapsed time: 446.47475838661194
708
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_320_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
709
+ Epoch: 321 | Training loss: 4.629168915891362 | Elapsed time: 446.4166913032532
710
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_321_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
711
+ Epoch: 322 | Training loss: 4.627541475429268 | Elapsed time: 446.43110871315
712
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_322_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
713
+ Epoch: 323 | Training loss: 4.621661218578468 | Elapsed time: 446.3329448699951
714
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_323_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
715
+ Epoch: 324 | Training loss: 4.620028126501514 | Elapsed time: 446.7637515068054
716
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_324_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
717
+ Epoch: 325 | Training loss: 4.6172513400247235 | Elapsed time: 446.3513734340668
718
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_325_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
719
+ Epoch: 326 | Training loss: 4.6199183930417975 | Elapsed time: 446.46453166007996
720
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_326_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
721
+ Epoch: 327 | Training loss: 4.619718764832395 | Elapsed time: 446.5673861503601
722
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_327_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
723
+ Epoch: 328 | Training loss: 4.612031961391548 | Elapsed time: 446.39114022254944
724
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_328_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
725
+ Epoch: 329 | Training loss: 4.617482111125649 | Elapsed time: 446.68063139915466
726
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_329_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
727
+ Epoch: 330 | Training loss: 4.604638452777368 | Elapsed time: 446.3780126571655
728
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_330_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
729
+ Epoch: 331 | Training loss: 4.60623934740078 | Elapsed time: 446.3321888446808
730
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_331_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
731
+ Epoch: 332 | Training loss: 4.610434230454191 | Elapsed time: 446.46926951408386
732
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_332_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
733
+ Epoch: 333 | Training loss: 4.603401630462525 | Elapsed time: 446.4835002422333
734
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_333_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
735
+ Epoch: 334 | Training loss: 4.6028220525044885 | Elapsed time: 446.28473448753357
736
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_334_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
737
+ Epoch: 335 | Training loss: 4.602875295513404 | Elapsed time: 446.4940302371979
738
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_335_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
739
+ Epoch: 336 | Training loss: 4.602786974992581 | Elapsed time: 446.333571434021
740
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_336_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
741
+ Epoch: 337 | Training loss: 4.601064914238905 | Elapsed time: 446.3235867023468
742
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_337_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
743
+ Epoch: 338 | Training loss: 4.595411247360016 | Elapsed time: 446.5643367767334
744
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_338_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
745
+ Epoch: 339 | Training loss: 4.600766887207945 | Elapsed time: 446.5149157047272
746
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_339_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
747
+ Epoch: 340 | Training loss: 4.597100349243529 | Elapsed time: 446.74661207199097
748
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_340_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
749
+ Epoch: 341 | Training loss: 4.593915541490871 | Elapsed time: 446.5706329345703
750
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_341_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
751
+ Epoch: 342 | Training loss: 4.592311059643409 | Elapsed time: 446.50575160980225
752
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_342_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
753
+ Epoch: 343 | Training loss: 4.5963724587491885 | Elapsed time: 446.46148800849915
754
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_343_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
755
+ Epoch: 344 | Training loss: 4.586942980151452 | Elapsed time: 446.42925548553467
756
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_344_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
757
+ Epoch: 345 | Training loss: 4.585597702605043 | Elapsed time: 446.68283772468567
758
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_345_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
759
+ Epoch: 346 | Training loss: 4.581499073081863 | Elapsed time: 446.6251368522644
760
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_346_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
761
+ Epoch: 347 | Training loss: 4.592227443725525 | Elapsed time: 446.34901785850525
762
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_347_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
763
+ Epoch: 348 | Training loss: 4.5861249207974435 | Elapsed time: 446.66551327705383
764
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_348_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
765
+ Epoch: 349 | Training loss: 4.586406044379442 | Elapsed time: 446.4297630786896
766
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_349_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
767
+ Epoch: 350 | Training loss: 4.57438800054158 | Elapsed time: 446.52056908607483
768
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_350_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
769
+ Epoch: 351 | Training loss: 4.586202912701818 | Elapsed time: 446.49175572395325
770
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_351_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
771
+ Epoch: 352 | Training loss: 4.579316108764527 | Elapsed time: 446.4484934806824
772
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_352_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
773
+ Epoch: 353 | Training loss: 4.580143793376382 | Elapsed time: 446.54468727111816
774
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_353_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
775
+ Epoch: 354 | Training loss: 4.570534638539998 | Elapsed time: 446.4549820423126
776
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_354_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
777
+ Epoch: 355 | Training loss: 4.572795209294545 | Elapsed time: 446.6251890659332
778
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_355_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
779
+ Epoch: 356 | Training loss: 4.5756774148541295 | Elapsed time: 446.6513035297394
780
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_356_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
781
+ Epoch: 357 | Training loss: 4.568796239689201 | Elapsed time: 446.4179563522339
782
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_357_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
783
+ Epoch: 358 | Training loss: 4.57147574281978 | Elapsed time: 446.43471693992615
784
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_358_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
785
+ Epoch: 359 | Training loss: 4.57370044894799 | Elapsed time: 446.36478090286255
786
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_359_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
787
+ Epoch: 360 | Training loss: 4.568556894085364 | Elapsed time: 448.68083143234253
788
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_360_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
789
+ Epoch: 361 | Training loss: 4.570777821683598 | Elapsed time: 446.4294321537018
790
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_361_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
791
+ Epoch: 362 | Training loss: 4.56642510457905 | Elapsed time: 446.3972907066345
792
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_362_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
793
+ Epoch: 363 | Training loss: 4.560708181110923 | Elapsed time: 446.37623047828674
794
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_363_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
795
+ Epoch: 364 | Training loss: 4.561449929387745 | Elapsed time: 446.5511381626129
796
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_364_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
797
+ Epoch: 365 | Training loss: 4.560856864837829 | Elapsed time: 446.3412301540375
798
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_365_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
799
+ Epoch: 366 | Training loss: 4.553440024514874 | Elapsed time: 446.44695234298706
800
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_366_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
801
+ Epoch: 367 | Training loss: 4.5533762244645235 | Elapsed time: 446.5055546760559
802
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_367_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
803
+ Epoch: 368 | Training loss: 4.552586954272912 | Elapsed time: 446.5034854412079
804
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_368_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
805
+ Epoch: 369 | Training loss: 4.55600472505459 | Elapsed time: 446.31446599960327
806
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_369_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
807
+ Epoch: 370 | Training loss: 4.552960763196507 | Elapsed time: 446.49599838256836
808
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_370_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
809
+ Epoch: 371 | Training loss: 4.543348848224876 | Elapsed time: 446.58443450927734
810
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_371_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
811
+ Epoch: 372 | Training loss: 4.542427336146494 | Elapsed time: 446.6605155467987
812
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_372_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
813
+ Epoch: 373 | Training loss: 4.547465552826841 | Elapsed time: 446.3920512199402
814
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_373_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
815
+ Epoch: 374 | Training loss: 4.552485851470582 | Elapsed time: 446.4593062400818
816
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_374_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
817
+ Epoch: 375 | Training loss: 4.547458614417892 | Elapsed time: 446.28450107574463
818
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_375_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
819
+ Epoch: 376 | Training loss: 4.545775563892966 | Elapsed time: 446.5002989768982
820
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_376_imagenet_10_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
821
+ slurmstepd: error: *** STEP 26857078.0 ON ga002 CANCELLED AT 2022-11-11T12:50:44 DUE TO TIME LIMIT ***
822
+ slurmstepd: error: *** JOB 26857078 ON ga002 CANCELLED AT 2022-11-11T12:50:44 DUE TO TIME LIMIT ***
823
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
logs/imagenet_1_gimel_0.out ADDED
The diff for this file is too large to render. See raw diff
 
logs/imagenet_alef_0.out ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_alef', vocab_size=16384, block_size=255, batch_size=128, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_alef', vocab_size=16384, block_size=255, batch_size=128, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ model:
4
+ base_learning_rate: 4.5e-06
5
+ params:
6
+ ddconfig:
7
+ attn_resolutions:
8
+ - 16
9
+ ch: 128
10
+ ch_mult:
11
+ - 1
12
+ - 1
13
+ - 2
14
+ - 2
15
+ - 4
16
+ double_z: false
17
+ dropout: 0.0
18
+ in_channels: 3
19
+ num_res_blocks: 2
20
+ out_ch: 3
21
+ resolution: 256
22
+ z_channels: 256
23
+ embed_dim: 256
24
+ lossconfig:
25
+ params:
26
+ codebook_weight: 1.0
27
+ disc_conditional: false
28
+ disc_in_channels: 3
29
+ disc_num_layers: 2
30
+ disc_start: 0
31
+ disc_weight: 0.75
32
+ target: vqloss.VQLPIPSWithDiscriminator
33
+ monitor: val/rec_loss
34
+ n_embed: 16384
35
+ target: vqmodel.VQModel
36
+
37
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
38
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
39
+ VQLPIPSWithDiscriminator running with hinge loss.
40
+ Loaded VQ encoder.
41
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
42
+ Number of parameters: 110417664
43
+ Running on 2 GPUs total
44
+ => no checkpoint loaded, will train from scratch
45
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
46
+ warnings.warn(warning.format(ret))
47
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
48
+ warnings.warn(warning.format(ret))
49
+ Epoch: 0 | Training loss: 6.128626347826673 | Elapsed time: 4421.352100610733
50
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
51
+ Epoch: 1 | Training loss: 5.8819179781667 | Elapsed time: 4417.959035873413
52
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
53
+ Epoch: 2 | Training loss: 5.814631825179368 | Elapsed time: 4418.510634183884
54
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
55
+ Epoch: 3 | Training loss: 5.773755791518357 | Elapsed time: 4418.096048593521
56
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
57
+ Epoch: 4 | Training loss: 5.746192256554023 | Elapsed time: 4417.264495372772
58
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
59
+ Epoch: 5 | Training loss: 5.723566655131368 | Elapsed time: 4418.32728767395
60
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
61
+ Epoch: 6 | Training loss: 5.70641222790881 | Elapsed time: 4417.584972858429
62
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
63
+ Epoch: 7 | Training loss: 5.6919463964609 | Elapsed time: 4418.683149814606
64
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
65
+ Epoch: 8 | Training loss: 5.68068699155535 | Elapsed time: 4418.64931344986
66
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
67
+ Epoch: 9 | Training loss: 5.669378303600239 | Elapsed time: 4419.641861200333
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 10 | Training loss: 5.661288778074495 | Elapsed time: 4418.546216726303
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 11 | Training loss: 5.6522860949094245 | Elapsed time: 4416.802042007446
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 12 | Training loss: 5.645374170812098 | Elapsed time: 4418.905344009399
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 13 | Training loss: 5.638707668845589 | Elapsed time: 4417.443339347839
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 14 | Training loss: 5.633227064226057 | Elapsed time: 4416.635406494141
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 15 | Training loss: 5.628721609887305 | Elapsed time: 4417.910185098648
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 16 | Training loss: 5.623982014784684 | Elapsed time: 4416.0065932273865
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 17 | Training loss: 5.618714102498301 | Elapsed time: 4419.553871631622
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 18 | Training loss: 5.615540227499399 | Elapsed time: 4420.723339796066
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 19 | Training loss: 5.612478973910763 | Elapsed time: 4420.372958898544
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 20 | Training loss: 5.607777811787821 | Elapsed time: 4419.815778970718
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 21 | Training loss: 5.6048696346454445 | Elapsed time: 4420.67625617981
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 22 | Training loss: 5.601634475925228 | Elapsed time: 4418.33234500885
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 23 | Training loss: 5.599205733536483 | Elapsed time: 4420.177897930145
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 24 | Training loss: 5.5956090254502575 | Elapsed time: 4422.450205564499
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 25 | Training loss: 5.593091600877303 | Elapsed time: 4420.362089633942
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 26 | Training loss: 5.590661748091539 | Elapsed time: 4420.89226937294
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 27 | Training loss: 5.589152030487518 | Elapsed time: 4419.890937328339
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 28 | Training loss: 5.586265545672589 | Elapsed time: 4422.632033824921
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 29 | Training loss: 5.5847198278634815 | Elapsed time: 4420.503535032272
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 30 | Training loss: 5.581631250886412 | Elapsed time: 4420.0441801548
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 31 | Training loss: 5.579172412308303 | Elapsed time: 4419.419838666916
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 32 | Training loss: 5.577459043222707 | Elapsed time: 4418.739659547806
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 33 | Training loss: 5.576781266886037 | Elapsed time: 4417.124375343323
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 34 | Training loss: 5.574231143383594 | Elapsed time: 4418.018548965454
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 35 | Training loss: 5.572677679947921 | Elapsed time: 4418.739028930664
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 36 | Training loss: 5.571132990887591 | Elapsed time: 4418.378818511963
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 37 | Training loss: 5.569969446675761 | Elapsed time: 4417.5980405807495
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 38 | Training loss: 5.567407997790631 | Elapsed time: 4418.226090431213
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_038_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
127
+ slurmstepd: error: *** JOB 25784080 ON ga005 CANCELLED AT 2022-10-12T12:43:01 DUE TO TIME LIMIT ***
128
+ slurmstepd: error: *** STEP 25784080.0 ON ga005 CANCELLED AT 2022-10-12T12:43:01 DUE TO TIME LIMIT ***
129
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
logs/imagenet_alef_1.out ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_alef', vocab_size=16384, block_size=255, batch_size=128, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_alef.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_alef', vocab_size=16384, block_size=255, batch_size=128, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_alef.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ model:
4
+ base_learning_rate: 4.5e-06
5
+ params:
6
+ ddconfig:
7
+ attn_resolutions:
8
+ - 16
9
+ ch: 128
10
+ ch_mult:
11
+ - 1
12
+ - 1
13
+ - 2
14
+ - 2
15
+ - 4
16
+ double_z: false
17
+ dropout: 0.0
18
+ in_channels: 3
19
+ num_res_blocks: 2
20
+ out_ch: 3
21
+ resolution: 256
22
+ z_channels: 256
23
+ embed_dim: 256
24
+ lossconfig:
25
+ params:
26
+ codebook_weight: 1.0
27
+ disc_conditional: false
28
+ disc_in_channels: 3
29
+ disc_num_layers: 2
30
+ disc_start: 0
31
+ disc_weight: 0.75
32
+ target: vqloss.VQLPIPSWithDiscriminator
33
+ monitor: val/rec_loss
34
+ n_embed: 16384
35
+ target: vqmodel.VQModel
36
+
37
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
38
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
39
+ VQLPIPSWithDiscriminator running with hinge loss.
40
+ Loaded VQ encoder.
41
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
42
+ Number of parameters: 110417664
43
+ Running on 2 GPUs total
44
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_alef.pt'
45
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
46
+ warnings.warn(warning.format(ret))
47
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
48
+ warnings.warn(warning.format(ret))
49
+ Epoch: 0 | Training loss: 5.566963162074437 | Elapsed time: 4441.384474277496
50
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
51
+ Epoch: 1 | Training loss: 5.565525438783171 | Elapsed time: 4439.502795934677
52
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
53
+ Epoch: 2 | Training loss: 5.56470417190384 | Elapsed time: 4439.483760356903
54
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
55
+ Epoch: 3 | Training loss: 5.5624204964309065 | Elapsed time: 4459.736646413803
56
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
57
+ Epoch: 4 | Training loss: 5.561623947151176 | Elapsed time: 4438.364464998245
58
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
59
+ Epoch: 5 | Training loss: 5.559443450474239 | Elapsed time: 4439.056439161301
60
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
61
+ Epoch: 6 | Training loss: 5.558322997955414 | Elapsed time: 4437.875585079193
62
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
63
+ Epoch: 7 | Training loss: 5.557123668853577 | Elapsed time: 4438.148260831833
64
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
65
+ Epoch: 8 | Training loss: 5.556930120127065 | Elapsed time: 4438.181549549103
66
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
67
+ Epoch: 9 | Training loss: 5.554859112883424 | Elapsed time: 4439.295676231384
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 10 | Training loss: 5.554743787077638 | Elapsed time: 4438.432215929031
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 11 | Training loss: 5.552606200981331 | Elapsed time: 4440.81184220314
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 12 | Training loss: 5.551569258702266 | Elapsed time: 4438.290123224258
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 13 | Training loss: 5.550291191447865 | Elapsed time: 4443.546922922134
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 14 | Training loss: 5.549496718434306 | Elapsed time: 4438.393090963364
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 15 | Training loss: 5.549165438367175 | Elapsed time: 4438.040966272354
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 16 | Training loss: 5.548175721640115 | Elapsed time: 4440.060662746429
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 17 | Training loss: 5.546404487103016 | Elapsed time: 4438.736160993576
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 18 | Training loss: 5.546351945078695 | Elapsed time: 4438.127324342728
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 19 | Training loss: 5.546195989698321 | Elapsed time: 4439.530611753464
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 20 | Training loss: 5.544273409429011 | Elapsed time: 4439.35408949852
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 21 | Training loss: 5.5438105140175375 | Elapsed time: 4438.643720626831
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 22 | Training loss: 5.542943477535343 | Elapsed time: 4531.208423137665
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 23 | Training loss: 5.542639147866142 | Elapsed time: 4437.5531125068665
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 24 | Training loss: 5.5411363609306346 | Elapsed time: 4437.793027877808
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 25 | Training loss: 5.54048266406064 | Elapsed time: 4440.606894731522
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 26 | Training loss: 5.539817189646291 | Elapsed time: 4438.403124570847
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 27 | Training loss: 5.54004734831971 | Elapsed time: 4437.80110001564
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 28 | Training loss: 5.538664855918922 | Elapsed time: 4437.545838356018
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 29 | Training loss: 5.5386441840515745 | Elapsed time: 4439.835049390793
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 30 | Training loss: 5.536924628373031 | Elapsed time: 4439.145692586899
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 31 | Training loss: 5.535808313333548 | Elapsed time: 4444.751228809357
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 32 | Training loss: 5.535345764331646 | Elapsed time: 4438.6881539821625
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 33 | Training loss: 5.53584591439673 | Elapsed time: 4438.868976354599
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 34 | Training loss: 5.5344146106388425 | Elapsed time: 4437.364977836609
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 35 | Training loss: 5.533990528533509 | Elapsed time: 4438.160496711731
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 36 | Training loss: 5.53347582459807 | Elapsed time: 4438.219930171967
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 37 | Training loss: 5.533296345949887 | Elapsed time: 4438.192430973053
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
125
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
126
+ slurmstepd: error: *** JOB 25999681 ON ga003 CANCELLED AT 2022-10-20T09:50:09 ***
127
+ slurmstepd: error: *** STEP 25999681.0 ON ga003 CANCELLED AT 2022-10-20T09:50:09 ***
logs/imagenet_alef_2.out ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_alef', vocab_size=16384, block_size=255, batch_size=128, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_alef.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_alef', vocab_size=16384, block_size=255, batch_size=128, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_alef.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ model:
4
+ base_learning_rate: 4.5e-06
5
+ params:
6
+ ddconfig:
7
+ attn_resolutions:
8
+ - 16
9
+ ch: 128
10
+ ch_mult:
11
+ - 1
12
+ - 1
13
+ - 2
14
+ - 2
15
+ - 4
16
+ double_z: false
17
+ dropout: 0.0
18
+ in_channels: 3
19
+ num_res_blocks: 2
20
+ out_ch: 3
21
+ resolution: 256
22
+ z_channels: 256
23
+ embed_dim: 256
24
+ lossconfig:
25
+ params:
26
+ codebook_weight: 1.0
27
+ disc_conditional: false
28
+ disc_in_channels: 3
29
+ disc_num_layers: 2
30
+ disc_start: 0
31
+ disc_weight: 0.75
32
+ target: vqloss.VQLPIPSWithDiscriminator
33
+ monitor: val/rec_loss
34
+ n_embed: 16384
35
+ target: vqmodel.VQModel
36
+
37
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
38
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
39
+ VQLPIPSWithDiscriminator running with hinge loss.
40
+ Loaded VQ encoder.
41
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
42
+ Number of parameters: 110417664
43
+ Running on 2 GPUs total
44
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_alef.pt'
45
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
46
+ warnings.warn(warning.format(ret))
47
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
48
+ warnings.warn(warning.format(ret))
49
+ Epoch: 0 | Training loss: 5.5326811582773 | Elapsed time: 4552.455201625824
50
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
51
+ Epoch: 1 | Training loss: 5.532087442948744 | Elapsed time: 4541.033269882202
52
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
53
+ Epoch: 2 | Training loss: 5.532145879842661 | Elapsed time: 4527.634945392609
54
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
55
+ Epoch: 3 | Training loss: 5.530630941895934 | Elapsed time: 4543.700802564621
56
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
57
+ Epoch: 4 | Training loss: 5.530634129130757 | Elapsed time: 4532.1716775894165
58
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
59
+ Epoch: 5 | Training loss: 5.529234550525616 | Elapsed time: 4535.384343624115
60
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
61
+ Epoch: 6 | Training loss: 5.528841091845776 | Elapsed time: 4544.032119035721
62
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
63
+ Epoch: 7 | Training loss: 5.528287436745384 | Elapsed time: 4536.490844488144
64
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
65
+ Epoch: 8 | Training loss: 5.528800182361584 | Elapsed time: 4534.5896253585815
66
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
67
+ Epoch: 9 | Training loss: 5.527357240251966 | Elapsed time: 4534.611388683319
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 10 | Training loss: 5.527853631353998 | Elapsed time: 4537.2827780246735
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 11 | Training loss: 5.5262433110179 | Elapsed time: 4538.416050672531
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 12 | Training loss: 5.525816423338014 | Elapsed time: 4545.015641212463
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 13 | Training loss: 5.525116331117613 | Elapsed time: 4534.264271497726
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 14 | Training loss: 5.5248599632636655 | Elapsed time: 4536.402310371399
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 15 | Training loss: 5.525006600645753 | Elapsed time: 4533.143029689789
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 16 | Training loss: 5.524488739009861 | Elapsed time: 4534.865980148315
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 17 | Training loss: 5.523257099784218 | Elapsed time: 4535.609833717346
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 18 | Training loss: 5.523583016219315 | Elapsed time: 4535.112987756729
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 19 | Training loss: 5.523885637277609 | Elapsed time: 4534.515798091888
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 20 | Training loss: 5.5223699675454245 | Elapsed time: 4533.731646776199
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 21 | Training loss: 5.52230408094027 | Elapsed time: 4535.808632612228
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 22 | Training loss: 5.5219054831848755 | Elapsed time: 4538.373485803604
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 23 | Training loss: 5.52192791739663 | Elapsed time: 4538.351050376892
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 24 | Training loss: 5.520790481472111 | Elapsed time: 4538.347093343735
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 25 | Training loss: 5.520515246681876 | Elapsed time: 4534.917105674744
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 26 | Training loss: 5.520187362876686 | Elapsed time: 4535.308793544769
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 27 | Training loss: 5.520720029139257 | Elapsed time: 4536.7174389362335
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 28 | Training loss: 5.519694088126991 | Elapsed time: 4536.545913219452
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 29 | Training loss: 5.519993619604425 | Elapsed time: 4534.929441213608
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 30 | Training loss: 5.518580463763836 | Elapsed time: 4535.2925271987915
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 31 | Training loss: 5.517713272178566 | Elapsed time: 4535.789410352707
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 32 | Training loss: 5.5175725548179235 | Elapsed time: 4536.752972602844
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 33 | Training loss: 5.518335195116468 | Elapsed time: 4535.80343747139
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 34 | Training loss: 5.517208845060426 | Elapsed time: 4536.006014823914
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 35 | Training loss: 5.517036294317864 | Elapsed time: 4532.4260675907135
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 36 | Training loss: 5.516782159643335 | Elapsed time: 4535.359925270081
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 37 | Training loss: 5.516859723566534 | Elapsed time: 4535.818285703659
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_alef_256b_0.0003lr_Adamo_0s.pt
125
+ slurmstepd: error: *** STEP 26102669.0 ON ga012 CANCELLED AT 2022-10-22T17:01:09 DUE TO TIME LIMIT ***
126
+ slurmstepd: error: *** JOB 26102669 ON ga012 CANCELLED AT 2022-10-22T17:01:09 DUE TO TIME LIMIT ***
127
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
logs/imagenet_bet_0.out ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ model:
6
+ base_learning_rate: 4.5e-06
7
+ params:
8
+ ddconfig:
9
+ attn_resolutions:
10
+ - 16
11
+ ch: 128
12
+ ch_mult:
13
+ - 1
14
+ - 1
15
+ - 2
16
+ - 2
17
+ - 4
18
+ double_z: false
19
+ dropout: 0.0
20
+ in_channels: 3
21
+ num_res_blocks: 2
22
+ out_ch: 3
23
+ resolution: 256
24
+ z_channels: 256
25
+ embed_dim: 256
26
+ lossconfig:
27
+ params:
28
+ codebook_weight: 1.0
29
+ disc_conditional: false
30
+ disc_in_channels: 3
31
+ disc_num_layers: 2
32
+ disc_start: 0
33
+ disc_weight: 0.75
34
+ target: vqloss.VQLPIPSWithDiscriminator
35
+ monitor: val/rec_loss
36
+ n_embed: 16384
37
+ target: vqmodel.VQModel
38
+
39
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
40
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
41
+ VQLPIPSWithDiscriminator running with hinge loss.
42
+ Loaded VQ encoder.
43
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
44
+ Number of parameters: 336126976
45
+ Running on 4 GPUs total
46
+ => no checkpoint loaded, will train from scratch
47
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
48
+ warnings.warn(warning.format(ret))
49
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
50
+ warnings.warn(warning.format(ret))
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ Epoch: 0 | Training loss: 6.120809218933532 | Elapsed time: 4216.346182346344
56
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
57
+ Epoch: 1 | Training loss: 5.819266463326407 | Elapsed time: 4214.421049594879
58
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
59
+ Epoch: 2 | Training loss: 5.747833351798348 | Elapsed time: 4215.357320308685
60
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
61
+ Epoch: 3 | Training loss: 5.703314850832913 | Elapsed time: 4214.853225708008
62
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
63
+ Epoch: 4 | Training loss: 5.6749757683836854 | Elapsed time: 4217.256542921066
64
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
65
+ Epoch: 5 | Training loss: 5.6489467209273885 | Elapsed time: 4213.987170219421
66
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
67
+ Epoch: 6 | Training loss: 5.632372181136887 | Elapsed time: 4215.189080238342
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 7 | Training loss: 5.6153448112480175 | Elapsed time: 4215.026100158691
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 8 | Training loss: 5.6036051693972535 | Elapsed time: 4214.023932218552
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 9 | Training loss: 5.591139983749771 | Elapsed time: 4213.995197534561
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 10 | Training loss: 5.582824171315897 | Elapsed time: 4214.716639280319
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 11 | Training loss: 5.5714759955277575 | Elapsed time: 4213.436714410782
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 12 | Training loss: 5.56347759706038 | Elapsed time: 4214.8268122673035
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 13 | Training loss: 5.555867176646595 | Elapsed time: 4215.11917757988
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 14 | Training loss: 5.551593566059947 | Elapsed time: 4214.872128725052
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 15 | Training loss: 5.5444927329902765 | Elapsed time: 4214.885483980179
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 16 | Training loss: 5.537123093905149 | Elapsed time: 4214.602069854736
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 17 | Training loss: 5.533478752406803 | Elapsed time: 4215.776180505753
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 18 | Training loss: 5.528530513942539 | Elapsed time: 4215.509309768677
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 19 | Training loss: 5.525342354407678 | Elapsed time: 4215.141629934311
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 20 | Training loss: 5.519145687572011 | Elapsed time: 4214.713824033737
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 21 | Training loss: 5.515950245909639 | Elapsed time: 4214.048691034317
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 22 | Training loss: 5.511700089327939 | Elapsed time: 4214.244443893433
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 23 | Training loss: 5.508350300193428 | Elapsed time: 4215.018330812454
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 24 | Training loss: 5.5022892468935485 | Elapsed time: 4215.608549833298
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 25 | Training loss: 5.500027142276059 | Elapsed time: 4214.96466422081
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 26 | Training loss: 5.496040144738379 | Elapsed time: 4214.980867147446
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 27 | Training loss: 5.49420889417132 | Elapsed time: 4213.624946117401
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 28 | Training loss: 5.4905321313665585 | Elapsed time: 4214.898879766464
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 29 | Training loss: 5.487669699723189 | Elapsed time: 4214.363673686981
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 30 | Training loss: 5.486008314938693 | Elapsed time: 4213.980500936508
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 31 | Training loss: 5.481856287919082 | Elapsed time: 4214.092894077301
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 32 | Training loss: 5.479644645248855 | Elapsed time: 4214.268122434616
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 33 | Training loss: 5.478202774284126 | Elapsed time: 4214.675089359283
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 34 | Training loss: 5.47588456131957 | Elapsed time: 4214.7124791145325
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 35 | Training loss: 5.472756884171889 | Elapsed time: 4215.282642841339
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 36 | Training loss: 5.469559281546395 | Elapsed time: 4216.246860980988
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 37 | Training loss: 5.468589157729477 | Elapsed time: 4215.361236572266
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 38 | Training loss: 5.466702774878625 | Elapsed time: 4215.317864179611
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_038_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 39 | Training loss: 5.46418444588706 | Elapsed time: 4216.935404777527
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_039_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
135
+ slurmstepd: error: *** JOB 25789531 ON ga008 CANCELLED AT 2022-10-12T18:45:27 DUE TO TIME LIMIT ***
136
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
137
+ slurmstepd: error: *** STEP 25789531.0 ON ga008 CANCELLED AT 2022-10-12T18:45:27 DUE TO TIME LIMIT ***
logs/imagenet_bet_1.out ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ model:
6
+ base_learning_rate: 4.5e-06
7
+ params:
8
+ ddconfig:
9
+ attn_resolutions:
10
+ - 16
11
+ ch: 128
12
+ ch_mult:
13
+ - 1
14
+ - 1
15
+ - 2
16
+ - 2
17
+ - 4
18
+ double_z: false
19
+ dropout: 0.0
20
+ in_channels: 3
21
+ num_res_blocks: 2
22
+ out_ch: 3
23
+ resolution: 256
24
+ z_channels: 256
25
+ embed_dim: 256
26
+ lossconfig:
27
+ params:
28
+ codebook_weight: 1.0
29
+ disc_conditional: false
30
+ disc_in_channels: 3
31
+ disc_num_layers: 2
32
+ disc_start: 0
33
+ disc_weight: 0.75
34
+ target: vqloss.VQLPIPSWithDiscriminator
35
+ monitor: val/rec_loss
36
+ n_embed: 16384
37
+ target: vqmodel.VQModel
38
+
39
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
40
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
41
+ VQLPIPSWithDiscriminator running with hinge loss.
42
+ Loaded VQ encoder.
43
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
44
+ Number of parameters: 336126976
45
+ Running on 4 GPUs total
46
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt'
47
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
48
+ warnings.warn(warning.format(ret))
49
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
50
+ warnings.warn(warning.format(ret))
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ Epoch: 0 | Training loss: 5.462450480580211 | Elapsed time: 4212.490542650223
56
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
57
+ Epoch: 1 | Training loss: 5.459824828739528 | Elapsed time: 4210.629241704941
58
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
59
+ Epoch: 2 | Training loss: 5.460091401385022 | Elapsed time: 4210.093405008316
60
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
61
+ Epoch: 3 | Training loss: 5.456877846770234 | Elapsed time: 4209.620198249817
62
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
63
+ Epoch: 4 | Training loss: 5.457421847942706 | Elapsed time: 4210.129886627197
64
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
65
+ Epoch: 5 | Training loss: 5.452875267947232 | Elapsed time: 4211.293568372726
66
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
67
+ Epoch: 6 | Training loss: 5.453069615816617 | Elapsed time: 4209.6937000751495
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 7 | Training loss: 5.4497216934448 | Elapsed time: 4209.908575057983
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 8 | Training loss: 5.449413906634747 | Elapsed time: 4210.370651245117
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 9 | Training loss: 5.446556557189454 | Elapsed time: 4208.084479093552
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 10 | Training loss: 5.4468175063957345 | Elapsed time: 4210.39133143425
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 11 | Training loss: 5.442614160455785 | Elapsed time: 4208.621545553207
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 12 | Training loss: 5.441316787536804 | Elapsed time: 4209.932279348373
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 13 | Training loss: 5.4395277352957105 | Elapsed time: 4209.686124324799
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 14 | Training loss: 5.440548940519472 | Elapsed time: 4213.4403860569
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 15 | Training loss: 5.43819805654017 | Elapsed time: 4208.388911247253
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 16 | Training loss: 5.434972073005272 | Elapsed time: 4208.486869335175
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 17 | Training loss: 5.435351838027085 | Elapsed time: 4208.599833726883
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 18 | Training loss: 5.433853230109581 | Elapsed time: 4208.173202514648
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 19 | Training loss: 5.434288627498752 | Elapsed time: 4208.1564836502075
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 20 | Training loss: 5.43105089776404 | Elapsed time: 4208.968448400497
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 21 | Training loss: 5.4307719106798045 | Elapsed time: 4208.604787111282
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 22 | Training loss: 5.429320645499063 | Elapsed time: 4211.22211098671
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 23 | Training loss: 5.4284210943437365 | Elapsed time: 4211.260216712952
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 24 | Training loss: 5.424778309783021 | Elapsed time: 4210.164441823959
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 25 | Training loss: 5.424842639831635 | Elapsed time: 4210.703585147858
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 26 | Training loss: 5.422915841482736 | Elapsed time: 4210.591207265854
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 27 | Training loss: 5.423080742347253 | Elapsed time: 4209.935755491257
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 28 | Training loss: 5.421325563836645 | Elapsed time: 4211.150771856308
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 29 | Training loss: 5.42019076104407 | Elapsed time: 4210.838050365448
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 30 | Training loss: 5.420276668140819 | Elapsed time: 4209.014040470123
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 31 | Training loss: 5.417722504717725 | Elapsed time: 4209.161095619202
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 32 | Training loss: 5.4170814586567 | Elapsed time: 4208.864420175552
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 33 | Training loss: 5.417063832640291 | Elapsed time: 4208.859807729721
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 34 | Training loss: 5.416183816564905 | Elapsed time: 4210.375951290131
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 35 | Training loss: 5.414503083910261 | Elapsed time: 4210.523791074753
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 36 | Training loss: 5.412380295199948 | Elapsed time: 4209.003999710083
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 37 | Training loss: 5.412666624528426 | Elapsed time: 4210.427684307098
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 38 | Training loss: 5.412035094631778 | Elapsed time: 4210.188027858734
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_038_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 39 | Training loss: 5.410666262162673 | Elapsed time: 4210.234239578247
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_039_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
135
+ slurmstepd: error: *** JOB 26026405 ON ga005 CANCELLED AT 2022-10-22T09:00:43 DUE TO TIME LIMIT ***
136
+ slurmstepd: error: *** STEP 26026405.0 ON ga005 CANCELLED AT 2022-10-22T09:00:43 DUE TO TIME LIMIT ***
137
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
logs/imagenet_bet_2.out ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_bet', vocab_size=16384, block_size=255, batch_size=64, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ model:
6
+ base_learning_rate: 4.5e-06
7
+ params:
8
+ ddconfig:
9
+ attn_resolutions:
10
+ - 16
11
+ ch: 128
12
+ ch_mult:
13
+ - 1
14
+ - 1
15
+ - 2
16
+ - 2
17
+ - 4
18
+ double_z: false
19
+ dropout: 0.0
20
+ in_channels: 3
21
+ num_res_blocks: 2
22
+ out_ch: 3
23
+ resolution: 256
24
+ z_channels: 256
25
+ embed_dim: 256
26
+ lossconfig:
27
+ params:
28
+ codebook_weight: 1.0
29
+ disc_conditional: false
30
+ disc_in_channels: 3
31
+ disc_num_layers: 2
32
+ disc_start: 0
33
+ disc_weight: 0.75
34
+ target: vqloss.VQLPIPSWithDiscriminator
35
+ monitor: val/rec_loss
36
+ n_embed: 16384
37
+ target: vqmodel.VQModel
38
+
39
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
40
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
41
+ VQLPIPSWithDiscriminator running with hinge loss.
42
+ Loaded VQ encoder.
43
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
44
+ Number of parameters: 336126976
45
+ Running on 4 GPUs total
46
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_bet.pt'
47
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
48
+ warnings.warn(warning.format(ret))
49
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
50
+ warnings.warn(warning.format(ret))
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ Epoch: 0 | Training loss: 5.409949554477658 | Elapsed time: 4208.888996124268
56
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
57
+ Epoch: 1 | Training loss: 5.408481276166308 | Elapsed time: 4205.581461429596
58
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
59
+ Epoch: 2 | Training loss: 5.40977370036351 | Elapsed time: 4206.132335186005
60
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
61
+ Epoch: 3 | Training loss: 5.407540570677339 | Elapsed time: 4206.588444232941
62
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
63
+ Epoch: 4 | Training loss: 5.409135840679858 | Elapsed time: 4207.734171390533
64
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
65
+ Epoch: 5 | Training loss: 5.405489936051192 | Elapsed time: 4207.939695358276
66
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
67
+ Epoch: 6 | Training loss: 5.4066948621065825 | Elapsed time: 4207.579474925995
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 7 | Training loss: 5.40413139869164 | Elapsed time: 4207.16299200058
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 8 | Training loss: 5.404782142696323 | Elapsed time: 4207.486356258392
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 9 | Training loss: 5.402767116706688 | Elapsed time: 4208.941865682602
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 10 | Training loss: 5.403839372659658 | Elapsed time: 4208.850451469421
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 11 | Training loss: 5.400299143433928 | Elapsed time: 4209.01265501976
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 12 | Training loss: 5.399841628612934 | Elapsed time: 4208.988021850586
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 13 | Training loss: 5.398706332691662 | Elapsed time: 4208.25732922554
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 14 | Training loss: 5.400468912229433 | Elapsed time: 4207.357954740524
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 15 | Training loss: 5.398756319802481 | Elapsed time: 4207.593279123306
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 16 | Training loss: 5.396284263736599 | Elapsed time: 4207.278984546661
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 17 | Training loss: 5.397241822108403 | Elapsed time: 4206.966629981995
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 18 | Training loss: 5.396315209658353 | Elapsed time: 4207.094468593597
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 19 | Training loss: 5.397342272405024 | Elapsed time: 4207.316946268082
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 20 | Training loss: 5.3946671945112685 | Elapsed time: 4209.307429075241
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 21 | Training loss: 5.394954957209386 | Elapsed time: 4209.498737573624
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 22 | Training loss: 5.394157246085671 | Elapsed time: 4209.038470983505
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 23 | Training loss: 5.393681000853395 | Elapsed time: 4208.570669412613
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 24 | Training loss: 5.39060800173185 | Elapsed time: 4208.7386746406555
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 25 | Training loss: 5.391106247401738 | Elapsed time: 4208.158373117447
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 26 | Training loss: 5.389693509329568 | Elapsed time: 4207.576681852341
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 27 | Training loss: 5.39031723138693 | Elapsed time: 4210.473081111908
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 28 | Training loss: 5.3889854459733995 | Elapsed time: 4208.715919494629
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 29 | Training loss: 5.388302274564882 | Elapsed time: 4208.99591255188
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 30 | Training loss: 5.388859150531171 | Elapsed time: 4208.274594783783
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 31 | Training loss: 5.386700378574215 | Elapsed time: 4209.104762554169
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 32 | Training loss: 5.386515518668649 | Elapsed time: 4210.278479099274
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 33 | Training loss: 5.386882271133103 | Elapsed time: 4209.119398832321
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 34 | Training loss: 5.3862939987982905 | Elapsed time: 4209.7529039382935
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 35 | Training loss: 5.385118471730601 | Elapsed time: 4209.082966089249
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 36 | Training loss: 5.383273743392228 | Elapsed time: 4208.447032213211
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 37 | Training loss: 5.383973514545452 | Elapsed time: 4208.768651485443
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 38 | Training loss: 5.383728143671057 | Elapsed time: 4208.8059668540955
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_038_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 39 | Training loss: 5.382696704311924 | Elapsed time: 4207.565321683884
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_039_imagenet_GPT_bet_256b_0.0003lr_Adamo_0s.pt
135
+ slurmstepd: error: *** STEP 26144125.0 ON ga002 CANCELLED AT 2022-10-24T11:03:03 ***
136
+ slurmstepd: error: *** JOB 26144125 ON ga002 CANCELLED AT 2022-10-24T11:03:03 ***
137
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
logs/imagenet_dalet_0.out ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
10
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
11
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
12
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
13
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
14
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
15
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
16
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
17
+ model:
18
+ base_learning_rate: 4.5e-06
19
+ params:
20
+ ddconfig:
21
+ attn_resolutions:
22
+ - 16
23
+ ch: 128
24
+ ch_mult:
25
+ - 1
26
+ - 1
27
+ - 2
28
+ - 2
29
+ - 4
30
+ double_z: false
31
+ dropout: 0.0
32
+ in_channels: 3
33
+ num_res_blocks: 2
34
+ out_ch: 3
35
+ resolution: 256
36
+ z_channels: 256
37
+ embed_dim: 256
38
+ lossconfig:
39
+ params:
40
+ codebook_weight: 1.0
41
+ disc_conditional: false
42
+ disc_in_channels: 3
43
+ disc_num_layers: 2
44
+ disc_start: 0
45
+ disc_weight: 0.75
46
+ target: vqloss.VQLPIPSWithDiscriminator
47
+ monitor: val/rec_loss
48
+ n_embed: 16384
49
+ target: vqmodel.VQModel
50
+
51
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
52
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
53
+ VQLPIPSWithDiscriminator running with hinge loss.
54
+ Loaded VQ encoder.
55
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
56
+ Number of parameters: 1528398400
57
+ Running on 16 GPUs total
58
+ => no checkpoint loaded, will train from scratch
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
68
+ warnings.warn(warning.format(ret))
69
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
70
+ warnings.warn(warning.format(ret))
71
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
72
+ warnings.warn(warning.format(ret))
73
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
74
+ warnings.warn(warning.format(ret))
75
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
76
+ warnings.warn(warning.format(ret))
77
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
78
+ warnings.warn(warning.format(ret))
79
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
80
+ warnings.warn(warning.format(ret))
81
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
82
+ warnings.warn(warning.format(ret))
83
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
84
+ warnings.warn(warning.format(ret))
85
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
86
+ warnings.warn(warning.format(ret))
87
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
88
+ warnings.warn(warning.format(ret))
89
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
90
+ warnings.warn(warning.format(ret))
91
+ Epoch: 0 | Training loss: 6.186269936861692 | Elapsed time: 5619.917689085007
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 1 | Training loss: 5.811945501740996 | Elapsed time: 5613.203445196152
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 2 | Training loss: 5.705892117183049 | Elapsed time: 5612.130469322205
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 3 | Training loss: 5.647481455312266 | Elapsed time: 5611.836899757385
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 4 | Training loss: 5.609939870062646 | Elapsed time: 5611.5135724544525
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 5 | Training loss: 5.578640112271914 | Elapsed time: 5611.435474157333
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 6 | Training loss: 5.558811575120741 | Elapsed time: 5611.554379701614
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 7 | Training loss: 5.5343817103992805 | Elapsed time: 5611.822140216827
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 8 | Training loss: 5.518903724487488 | Elapsed time: 5611.473790168762
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 9 | Training loss: 5.501999848467725 | Elapsed time: 5611.3581511974335
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 10 | Training loss: 5.492386792208646 | Elapsed time: 5610.871157884598
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 11 | Training loss: 5.477873217404544 | Elapsed time: 5611.105211257935
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 12 | Training loss: 5.4684995082470325 | Elapsed time: 5626.826799869537
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 13 | Training loss: 5.457873392629099 | Elapsed time: 5617.610741138458
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 14 | Training loss: 5.4496900581337 | Elapsed time: 5614.019058704376
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 15 | Training loss: 5.44148720094374 | Elapsed time: 5613.779396772385
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 16 | Training loss: 5.434726895057 | Elapsed time: 5712.453778743744
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 17 | Training loss: 5.426652185304777 | Elapsed time: 5632.289559364319
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 18 | Training loss: 5.419801063709087 | Elapsed time: 5717.344505786896
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 19 | Training loss: 5.418267493576675 | Elapsed time: 5611.852866649628
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 20 | Training loss: 5.404467808211838 | Elapsed time: 5611.138695001602
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 21 | Training loss: 5.405431366633702 | Elapsed time: 5611.159034490585
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
135
+ Epoch: 22 | Training loss: 5.397424073176427 | Elapsed time: 5611.598006248474
136
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
137
+ Epoch: 23 | Training loss: 5.39218693265429 | Elapsed time: 5610.9683384895325
138
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
139
+ Epoch: 24 | Training loss: 5.38683369300225 | Elapsed time: 5611.3924922943115
140
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
141
+ Epoch: 25 | Training loss: 5.380850922168194 | Elapsed time: 5611.161632537842
142
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
143
+ Epoch: 26 | Training loss: 5.376200350038298 | Elapsed time: 5611.715535879135
144
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
145
+ Epoch: 27 | Training loss: 5.374127079795052 | Elapsed time: 5611.047680616379
146
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
147
+ Epoch: 28 | Training loss: 5.366480315815319 | Elapsed time: 5611.688236951828
148
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
149
+ Epoch: 29 | Training loss: 5.362076455324918 | Elapsed time: 5611.6972777843475
150
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
151
+ slurmstepd: error: *** JOB 25804226 ON ga001 CANCELLED AT 2022-10-14T19:34:18 DUE TO TIME LIMIT ***
152
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
153
+ slurmstepd: error: *** STEP 25804226.0 ON ga001 CANCELLED AT 2022-10-14T19:34:18 DUE TO TIME LIMIT ***
logs/imagenet_dalet_1.out ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
10
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
11
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
12
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
13
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
14
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
15
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
16
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
17
+ model:
18
+ base_learning_rate: 4.5e-06
19
+ params:
20
+ ddconfig:
21
+ attn_resolutions:
22
+ - 16
23
+ ch: 128
24
+ ch_mult:
25
+ - 1
26
+ - 1
27
+ - 2
28
+ - 2
29
+ - 4
30
+ double_z: false
31
+ dropout: 0.0
32
+ in_channels: 3
33
+ num_res_blocks: 2
34
+ out_ch: 3
35
+ resolution: 256
36
+ z_channels: 256
37
+ embed_dim: 256
38
+ lossconfig:
39
+ params:
40
+ codebook_weight: 1.0
41
+ disc_conditional: false
42
+ disc_in_channels: 3
43
+ disc_num_layers: 2
44
+ disc_start: 0
45
+ disc_weight: 0.75
46
+ target: vqloss.VQLPIPSWithDiscriminator
47
+ monitor: val/rec_loss
48
+ n_embed: 16384
49
+ target: vqmodel.VQModel
50
+
51
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
52
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
53
+ VQLPIPSWithDiscriminator running with hinge loss.
54
+ Loaded VQ encoder.
55
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
56
+ Number of parameters: 1528398400
57
+ Running on 16 GPUs total
58
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt'
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
68
+ warnings.warn(warning.format(ret))
69
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
70
+ warnings.warn(warning.format(ret))
71
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
72
+ warnings.warn(warning.format(ret))
73
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
74
+ warnings.warn(warning.format(ret))
75
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
76
+ warnings.warn(warning.format(ret))
77
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
78
+ warnings.warn(warning.format(ret))
79
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
80
+ warnings.warn(warning.format(ret))
81
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
82
+ warnings.warn(warning.format(ret))
83
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
84
+ warnings.warn(warning.format(ret))
85
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
86
+ warnings.warn(warning.format(ret))
87
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
88
+ warnings.warn(warning.format(ret))
89
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
90
+ warnings.warn(warning.format(ret))
91
+ Epoch: 0 | Training loss: 5.36132043386911 | Elapsed time: 5611.475602149963
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 1 | Training loss: 5.353246255544992 | Elapsed time: 5609.176076889038
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 2 | Training loss: 5.352081888848609 | Elapsed time: 5609.765173435211
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 3 | Training loss: 5.348592715401511 | Elapsed time: 5609.775065898895
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 4 | Training loss: 5.345363831496263 | Elapsed time: 5608.818708658218
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 5 | Training loss: 5.339491759766113 | Elapsed time: 5609.52224946022
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 6 | Training loss: 5.33984013451682 | Elapsed time: 5609.668743610382
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 7 | Training loss: 5.330916097471407 | Elapsed time: 5609.77694439888
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 8 | Training loss: 5.328678440642761 | Elapsed time: 5609.147026062012
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 9 | Training loss: 5.322817993259335 | Elapsed time: 5610.099377155304
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 10 | Training loss: 5.322999638491696 | Elapsed time: 5609.647800445557
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 11 | Training loss: 5.3166510385709564 | Elapsed time: 5609.4286053180695
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 12 | Training loss: 5.314551228552789 | Elapsed time: 5609.531573057175
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 13 | Training loss: 5.310544481096449 | Elapsed time: 5609.943645954132
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 14 | Training loss: 5.307691298760139 | Elapsed time: 5610.385461807251
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 15 | Training loss: 5.304726327406419 | Elapsed time: 5609.1782310009
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 16 | Training loss: 5.3027097130393415 | Elapsed time: 5608.50914645195
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 17 | Training loss: 5.2987160214891915 | Elapsed time: 5609.189682483673
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 18 | Training loss: 5.295181529648178 | Elapsed time: 5609.2520797252655
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 19 | Training loss: 5.2975093622426765 | Elapsed time: 5609.4860327243805
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 20 | Training loss: 5.286686991120909 | Elapsed time: 5610.3333423137665
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 21 | Training loss: 5.290533660294174 | Elapsed time: 5609.560243368149
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
135
+ Epoch: 22 | Training loss: 5.285777743069918 | Elapsed time: 5609.129464626312
136
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
137
+ Epoch: 23 | Training loss: 5.282636421996278 | Elapsed time: 5609.90997338295
138
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
139
+ Epoch: 24 | Training loss: 5.279898757248611 | Elapsed time: 5609.894840478897
140
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
141
+ Epoch: 25 | Training loss: 5.275894250307645 | Elapsed time: 5609.922780275345
142
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
143
+ Epoch: 26 | Training loss: 5.273422160324874 | Elapsed time: 5608.922451257706
144
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
145
+ Epoch: 27 | Training loss: 5.273311212560633 | Elapsed time: 5609.505341529846
146
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
147
+ Epoch: 28 | Training loss: 5.267515561011407 | Elapsed time: 5609.848466873169
148
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
149
+ Epoch: 29 | Training loss: 5.2649664964590155 | Elapsed time: 5608.95155954361
150
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
151
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
152
+ slurmstepd: error: *** JOB 25928917 ON ga001 CANCELLED AT 2022-10-17T22:21:40 ***
153
+ slurmstepd: error: *** STEP 25928917.0 ON ga001 CANCELLED AT 2022-10-17T22:21:40 ***
logs/imagenet_dalet_2.out ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
10
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
11
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
12
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
13
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
14
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
15
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
16
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_dalet', vocab_size=16384, block_size=255, batch_size=16, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
17
+ model:
18
+ base_learning_rate: 4.5e-06
19
+ params:
20
+ ddconfig:
21
+ attn_resolutions:
22
+ - 16
23
+ ch: 128
24
+ ch_mult:
25
+ - 1
26
+ - 1
27
+ - 2
28
+ - 2
29
+ - 4
30
+ double_z: false
31
+ dropout: 0.0
32
+ in_channels: 3
33
+ num_res_blocks: 2
34
+ out_ch: 3
35
+ resolution: 256
36
+ z_channels: 256
37
+ embed_dim: 256
38
+ lossconfig:
39
+ params:
40
+ codebook_weight: 1.0
41
+ disc_conditional: false
42
+ disc_in_channels: 3
43
+ disc_num_layers: 2
44
+ disc_start: 0
45
+ disc_weight: 0.75
46
+ target: vqloss.VQLPIPSWithDiscriminator
47
+ monitor: val/rec_loss
48
+ n_embed: 16384
49
+ target: vqmodel.VQModel
50
+
51
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
52
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
53
+ VQLPIPSWithDiscriminator running with hinge loss.
54
+ Loaded VQ encoder.
55
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
56
+ Number of parameters: 1528398400
57
+ Running on 16 GPUs total
58
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_dalet.pt'
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
68
+ warnings.warn(warning.format(ret))
69
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
70
+ warnings.warn(warning.format(ret))
71
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
72
+ warnings.warn(warning.format(ret))
73
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
74
+ warnings.warn(warning.format(ret))
75
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
76
+ warnings.warn(warning.format(ret))
77
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
78
+ warnings.warn(warning.format(ret))
79
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
80
+ warnings.warn(warning.format(ret))
81
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
82
+ warnings.warn(warning.format(ret))
83
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
84
+ warnings.warn(warning.format(ret))
85
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
86
+ warnings.warn(warning.format(ret))
87
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
88
+ warnings.warn(warning.format(ret))
89
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
90
+ warnings.warn(warning.format(ret))
91
+ Epoch: 0 | Training loss: 5.265875637805188 | Elapsed time: 5567.522572040558
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 1 | Training loss: 5.259369312823712 | Elapsed time: 5564.492578029633
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 2 | Training loss: 5.2597381899525955 | Elapsed time: 5564.743162870407
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 3 | Training loss: 5.258067237366211 | Elapsed time: 5564.359503269196
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 4 | Training loss: 5.256607461618734 | Elapsed time: 5564.1615924835205
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 5 | Training loss: 5.25204824624838 | Elapsed time: 5564.931565761566
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 6 | Training loss: 5.2539677929568604 | Elapsed time: 5563.973826169968
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 7 | Training loss: 5.24619766086727 | Elapsed time: 5564.175024271011
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 8 | Training loss: 5.245521523211743 | Elapsed time: 5565.000099182129
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 9 | Training loss: 5.2409252663116 | Elapsed time: 5566.062009334564
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 10 | Training loss: 5.242174459813715 | Elapsed time: 5564.810876607895
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 11 | Training loss: 5.236654115080476 | Elapsed time: 5564.1297216415405
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 12 | Training loss: 5.236161358706601 | Elapsed time: 5564.3255116939545
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 13 | Training loss: 5.233254426294988 | Elapsed time: 5563.92977309227
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 14 | Training loss: 5.23119745626078 | Elapsed time: 5564.57776761055
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 15 | Training loss: 5.229216562570273 | Elapsed time: 5564.952026605606
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 16 | Training loss: 5.22834527714031 | Elapsed time: 5564.028384447098
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 17 | Training loss: 5.22506249367774 | Elapsed time: 5564.405800104141
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 18 | Training loss: 5.222301427896444 | Elapsed time: 5564.323853731155
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 19 | Training loss: 5.225507423141739 | Elapsed time: 5564.729813575745
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 20 | Training loss: 5.21564470618874 | Elapsed time: 5564.293010473251
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 21 | Training loss: 5.22011399159541 | Elapsed time: 5564.039561748505
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
135
+ Epoch: 22 | Training loss: 5.216228381641857 | Elapsed time: 5563.3787343502045
136
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
137
+ Epoch: 23 | Training loss: 5.213790066401799 | Elapsed time: 5564.072102308273
138
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
139
+ Epoch: 24 | Training loss: 5.211814184789057 | Elapsed time: 5564.09782910347
140
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
141
+ Epoch: 25 | Training loss: 5.208216408225564 | Elapsed time: 5564.173808813095
142
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
143
+ Epoch: 26 | Training loss: 5.206609721950718 | Elapsed time: 5564.133508682251
144
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
145
+ Epoch: 27 | Training loss: 5.2072193223875125 | Elapsed time: 5564.5976548194885
146
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
147
+ Epoch: 28 | Training loss: 5.202074414842969 | Elapsed time: 5564.557286977768
148
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
149
+ Epoch: 29 | Training loss: 5.200261769880663 | Elapsed time: 5564.025668859482
150
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_dalet_256b_0.0003lr_Adamo_0s.pt
151
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
152
+ slurmstepd: error: *** JOB 25995681 ON ga001 CANCELLED AT 2022-10-19T21:05:48 ***
153
+ slurmstepd: error: *** STEP 25995681.0 ON ga001 CANCELLED AT 2022-10-19T21:05:48 ***
logs/imagenet_gimel_0.out ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ model:
10
+ base_learning_rate: 4.5e-06
11
+ params:
12
+ ddconfig:
13
+ attn_resolutions:
14
+ - 16
15
+ ch: 128
16
+ ch_mult:
17
+ - 1
18
+ - 1
19
+ - 2
20
+ - 2
21
+ - 4
22
+ double_z: false
23
+ dropout: 0.0
24
+ in_channels: 3
25
+ num_res_blocks: 2
26
+ out_ch: 3
27
+ resolution: 256
28
+ z_channels: 256
29
+ embed_dim: 256
30
+ lossconfig:
31
+ params:
32
+ codebook_weight: 1.0
33
+ disc_conditional: false
34
+ disc_in_channels: 3
35
+ disc_num_layers: 2
36
+ disc_start: 0
37
+ disc_weight: 0.75
38
+ target: vqloss.VQLPIPSWithDiscriminator
39
+ monitor: val/rec_loss
40
+ n_embed: 16384
41
+ target: vqmodel.VQModel
42
+
43
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
44
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
45
+ VQLPIPSWithDiscriminator running with hinge loss.
46
+ Loaded VQ encoder.
47
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
48
+ Number of parameters: 750659840
49
+ Running on 8 GPUs total
50
+ => no checkpoint loaded, will train from scratch
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
56
+ warnings.warn(warning.format(ret))
57
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
58
+ warnings.warn(warning.format(ret))
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ Epoch: 0 | Training loss: 6.135367824123813 | Elapsed time: 4453.789637804031
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 1 | Training loss: 5.798131484299392 | Elapsed time: 4448.56702375412
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 2 | Training loss: 5.7218508500319265 | Elapsed time: 4448.635702133179
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 3 | Training loss: 5.675868084475949 | Elapsed time: 4448.570371866226
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 4 | Training loss: 5.64415309231479 | Elapsed time: 4448.48592376709
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 5 | Training loss: 5.617594873083459 | Elapsed time: 4448.740148067474
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 6 | Training loss: 5.5994461619770615 | Elapsed time: 4449.281894683838
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 7 | Training loss: 5.580984299856943 | Elapsed time: 4448.514730215073
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 8 | Training loss: 5.568105853306545 | Elapsed time: 4448.5986959934235
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 9 | Training loss: 5.553497776713643 | Elapsed time: 4449.114318370819
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 10 | Training loss: 5.545027699884954 | Elapsed time: 4449.138834238052
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 11 | Training loss: 5.531521415519905 | Elapsed time: 4449.098812580109
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 12 | Training loss: 5.522672693498365 | Elapsed time: 4448.901001691818
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 13 | Training loss: 5.515013064823665 | Elapsed time: 4448.462759017944
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 14 | Training loss: 5.508660832556573 | Elapsed time: 4448.8206622600555
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 15 | Training loss: 5.500996496865561 | Elapsed time: 4448.373802423477
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 16 | Training loss: 5.494677847463053 | Elapsed time: 4449.025486946106
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 17 | Training loss: 5.488317275642753 | Elapsed time: 4448.813071966171
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 18 | Training loss: 5.482922156159575 | Elapsed time: 4448.179989337921
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 19 | Training loss: 5.480782058879688 | Elapsed time: 4448.937339067459
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 20 | Training loss: 5.471766509876384 | Elapsed time: 4448.567655324936
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 21 | Training loss: 5.468871520973228 | Elapsed time: 4448.808972358704
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 22 | Training loss: 5.463682885698743 | Elapsed time: 4447.909594774246
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 23 | Training loss: 5.459242056466483 | Elapsed time: 4447.975906133652
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 24 | Training loss: 5.454185632654242 | Elapsed time: 4447.988601446152
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 25 | Training loss: 5.451898510329849 | Elapsed time: 4448.234513998032
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 26 | Training loss: 5.446100732496569 | Elapsed time: 4448.3813943862915
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 27 | Training loss: 5.443605179839082 | Elapsed time: 4448.738905668259
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 28 | Training loss: 5.440151975633619 | Elapsed time: 4448.119179487228
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 29 | Training loss: 5.435839955123154 | Elapsed time: 4447.811242103577
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 30 | Training loss: 5.43510546612811 | Elapsed time: 4447.706588983536
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 31 | Training loss: 5.428354823505962 | Elapsed time: 4448.152802705765
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 32 | Training loss: 5.4250246925430226 | Elapsed time: 4448.008017539978
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 33 | Training loss: 5.425318639833372 | Elapsed time: 4448.51774430275
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
135
+ Epoch: 34 | Training loss: 5.419239971187565 | Elapsed time: 4447.93063378334
136
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
137
+ Epoch: 35 | Training loss: 5.418465005815565 | Elapsed time: 4447.893654823303
138
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
139
+ Epoch: 36 | Training loss: 5.416968753978566 | Elapsed time: 4447.303329944611
140
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
141
+ Epoch: 37 | Training loss: 5.414549265088854 | Elapsed time: 4447.923640966415
142
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
143
+ slurmstepd: error: *** JOB 25434829 ON ga007 CANCELLED AT 2022-09-27T05:16:34 DUE TO TIME LIMIT ***
144
+ slurmstepd: error: *** STEP 25434829.0 ON ga007 CANCELLED AT 2022-09-27T05:16:34 DUE TO TIME LIMIT ***
145
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
logs/imagenet_gimel_1.out ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ model:
10
+ base_learning_rate: 4.5e-06
11
+ params:
12
+ ddconfig:
13
+ attn_resolutions:
14
+ - 16
15
+ ch: 128
16
+ ch_mult:
17
+ - 1
18
+ - 1
19
+ - 2
20
+ - 2
21
+ - 4
22
+ double_z: false
23
+ dropout: 0.0
24
+ in_channels: 3
25
+ num_res_blocks: 2
26
+ out_ch: 3
27
+ resolution: 256
28
+ z_channels: 256
29
+ embed_dim: 256
30
+ lossconfig:
31
+ params:
32
+ codebook_weight: 1.0
33
+ disc_conditional: false
34
+ disc_in_channels: 3
35
+ disc_num_layers: 2
36
+ disc_start: 0
37
+ disc_weight: 0.75
38
+ target: vqloss.VQLPIPSWithDiscriminator
39
+ monitor: val/rec_loss
40
+ n_embed: 16384
41
+ target: vqmodel.VQModel
42
+
43
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
44
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
45
+ VQLPIPSWithDiscriminator running with hinge loss.
46
+ Loaded VQ encoder.
47
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
48
+ Number of parameters: 750659840
49
+ Running on 8 GPUs total
50
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt'
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
56
+ warnings.warn(warning.format(ret))
57
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
58
+ warnings.warn(warning.format(ret))
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ Epoch: 0 | Training loss: 5.4122182536434815 | Elapsed time: 4451.620691537857
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 1 | Training loss: 5.407565878297423 | Elapsed time: 4448.908405780792
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 2 | Training loss: 5.4076390378839605 | Elapsed time: 4449.1268055438995
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 3 | Training loss: 5.405201209960999 | Elapsed time: 4448.375809907913
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 4 | Training loss: 5.402879751931418 | Elapsed time: 4448.051635503769
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 5 | Training loss: 5.398940936382953 | Elapsed time: 4447.108489990234
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 6 | Training loss: 5.3984892385942 | Elapsed time: 4448.6805555820465
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 7 | Training loss: 5.394524213484117 | Elapsed time: 4447.839882135391
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 8 | Training loss: 5.393681722349458 | Elapsed time: 4447.997987985611
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 9 | Training loss: 5.389481829834747 | Elapsed time: 4446.9174818992615
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 10 | Training loss: 5.3900642942834445 | Elapsed time: 4450.051897764206
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 11 | Training loss: 5.38399512570102 | Elapsed time: 4446.795197725296
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 12 | Training loss: 5.382598996805502 | Elapsed time: 4447.133504629135
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 13 | Training loss: 5.380970008556659 | Elapsed time: 4447.336958885193
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 14 | Training loss: 5.380182438296871 | Elapsed time: 4448.077193975449
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 15 | Training loss: 5.37776358468192 | Elapsed time: 4447.623661279678
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 16 | Training loss: 5.3761095024131755 | Elapsed time: 4446.6331260204315
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 17 | Training loss: 5.373553631331895 | Elapsed time: 4448.016380786896
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 18 | Training loss: 5.372018059626683 | Elapsed time: 4447.691753149033
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 19 | Training loss: 5.373565301409253 | Elapsed time: 4449.665137767792
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 20 | Training loss: 5.368000029945946 | Elapsed time: 4447.717018127441
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 21 | Training loss: 5.368069200487166 | Elapsed time: 4448.9792404174805
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 22 | Training loss: 5.365969175439734 | Elapsed time: 4448.452637910843
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 23 | Training loss: 5.36370946005746 | Elapsed time: 4449.199255228043
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 24 | Training loss: 5.361778489740698 | Elapsed time: 4448.894506216049
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 25 | Training loss: 5.36139729773248 | Elapsed time: 4449.400998592377
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 26 | Training loss: 5.357821354308686 | Elapsed time: 4448.22802734375
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 27 | Training loss: 5.3575045292194075 | Elapsed time: 4448.227440834045
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 28 | Training loss: 5.356028392002894 | Elapsed time: 4449.236404657364
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 29 | Training loss: 5.3537167115644975 | Elapsed time: 4448.098119497299
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 30 | Training loss: 5.354773959580001 | Elapsed time: 4448.19157743454
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 31 | Training loss: 5.349621527726119 | Elapsed time: 4448.848588943481
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 32 | Training loss: 5.347856791226657 | Elapsed time: 4449.08669257164
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 33 | Training loss: 5.349459922420872 | Elapsed time: 4449.880000591278
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
135
+ Epoch: 34 | Training loss: 5.3449111444966775 | Elapsed time: 4448.975071430206
136
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
137
+ Epoch: 35 | Training loss: 5.345478597578111 | Elapsed time: 4449.066173315048
138
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
139
+ Epoch: 36 | Training loss: 5.345441694097681 | Elapsed time: 4449.324020385742
140
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
141
+ Epoch: 37 | Training loss: 5.34429892197951 | Elapsed time: 4449.110852479935
142
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
143
+ slurmstepd: error: *** JOB 25481932 ON ga007 CANCELLED AT 2022-09-29T05:17:09 DUE TO TIME LIMIT ***
144
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
145
+ slurmstepd: error: *** STEP 25481932.0 ON ga007 CANCELLED AT 2022-09-29T05:17:09 DUE TO TIME LIMIT ***
logs/imagenet_gimel_2.out ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/scratch/work/public/imagenet/train', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt', save_prefix='imagenet', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ model:
10
+ base_learning_rate: 4.5e-06
11
+ params:
12
+ ddconfig:
13
+ attn_resolutions:
14
+ - 16
15
+ ch: 128
16
+ ch_mult:
17
+ - 1
18
+ - 1
19
+ - 2
20
+ - 2
21
+ - 4
22
+ double_z: false
23
+ dropout: 0.0
24
+ in_channels: 3
25
+ num_res_blocks: 2
26
+ out_ch: 3
27
+ resolution: 256
28
+ z_channels: 256
29
+ embed_dim: 256
30
+ lossconfig:
31
+ params:
32
+ codebook_weight: 1.0
33
+ disc_conditional: false
34
+ disc_in_channels: 3
35
+ disc_num_layers: 2
36
+ disc_start: 0
37
+ disc_weight: 0.75
38
+ target: vqloss.VQLPIPSWithDiscriminator
39
+ monitor: val/rec_loss
40
+ n_embed: 16384
41
+ target: vqmodel.VQModel
42
+
43
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
44
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
45
+ VQLPIPSWithDiscriminator running with hinge loss.
46
+ Loaded VQ encoder.
47
+ Data loaded: dataset contains 1281167 images, and takes 5005 training iterations per epoch.
48
+ Number of parameters: 750659840
49
+ Running on 8 GPUs total
50
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/imagenet_gimel.pt'
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
56
+ warnings.warn(warning.format(ret))
57
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
58
+ warnings.warn(warning.format(ret))
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ Epoch: 0 | Training loss: 5.34334692397675 | Elapsed time: 4449.657378435135
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 1 | Training loss: 5.339776907052908 | Elapsed time: 4447.917886018753
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 2 | Training loss: 5.341135098479249 | Elapsed time: 4444.09060049057
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 3 | Training loss: 5.339914214813507 | Elapsed time: 4442.944844245911
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 4 | Training loss: 5.338722199564809 | Elapsed time: 4443.83095407486
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 5 | Training loss: 5.335813725935472 | Elapsed time: 4443.666944980621
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 6 | Training loss: 5.336396114166443 | Elapsed time: 4443.466259479523
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 7 | Training loss: 5.333464476921699 | Elapsed time: 4442.281717777252
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 8 | Training loss: 5.333533509889921 | Elapsed time: 4443.259808301926
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 9 | Training loss: 5.330415923778827 | Elapsed time: 4442.889262676239
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 10 | Training loss: 5.331861632687229 | Elapsed time: 4443.495901584625
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 11 | Training loss: 5.326579586394898 | Elapsed time: 4445.117045164108
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 12 | Training loss: 5.326098694929948 | Elapsed time: 4444.019357442856
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 13 | Training loss: 5.325309695111407 | Elapsed time: 4447.032785177231
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 14 | Training loss: 5.325285927661054 | Elapsed time: 4442.325577259064
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 15 | Training loss: 5.323727816182536 | Elapsed time: 4445.216247320175
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 16 | Training loss: 5.322813287386289 | Elapsed time: 4442.510272264481
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 17 | Training loss: 5.320948296183949 | Elapsed time: 4443.243757009506
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 18 | Training loss: 5.31982838042847 | Elapsed time: 4444.346598625183
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 19 | Training loss: 5.32228920390675 | Elapsed time: 4443.063770294189
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 20 | Training loss: 5.317358242858064 | Elapsed time: 4443.388057470322
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 21 | Training loss: 5.3179008985971 | Elapsed time: 4443.252651691437
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 22 | Training loss: 5.31656089710308 | Elapsed time: 4444.633692741394
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 23 | Training loss: 5.314716050436685 | Elapsed time: 4442.504682302475
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 24 | Training loss: 5.313526153564453 | Elapsed time: 4443.70303940773
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 25 | Training loss: 5.313580217466249 | Elapsed time: 4448.647860527039
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 26 | Training loss: 5.310432777538166 | Elapsed time: 4448.255652666092
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 27 | Training loss: 5.310751127458357 | Elapsed time: 4447.5457644462585
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
123
+ Epoch: 28 | Training loss: 5.309859373281291 | Elapsed time: 4448.77831530571
124
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_028_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
125
+ Epoch: 29 | Training loss: 5.307888440771417 | Elapsed time: 4449.294291257858
126
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_029_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
127
+ Epoch: 30 | Training loss: 5.309594836601844 | Elapsed time: 4448.635311365128
128
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_030_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
129
+ Epoch: 31 | Training loss: 5.3048422309425804 | Elapsed time: 4449.126455307007
130
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_031_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
131
+ Epoch: 32 | Training loss: 5.303619781097808 | Elapsed time: 4449.342467546463
132
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_032_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
133
+ Epoch: 33 | Training loss: 5.305473794946661 | Elapsed time: 4448.250262260437
134
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_033_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
135
+ Epoch: 34 | Training loss: 5.3013285041450855 | Elapsed time: 4448.139315605164
136
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_034_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
137
+ Epoch: 35 | Training loss: 5.302365521212796 | Elapsed time: 4447.442922592163
138
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_035_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
139
+ Epoch: 36 | Training loss: 5.302704889695723 | Elapsed time: 4447.713094472885
140
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_036_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
141
+ Epoch: 37 | Training loss: 5.302075632611712 | Elapsed time: 4447.7592005729675
142
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_037_imagenet_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
143
+ slurmstepd: error: *** JOB 25583565 ON ga005 CANCELLED AT 2022-10-04T04:31:40 DUE TO TIME LIMIT ***
144
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
145
+ slurmstepd: error: *** STEP 25583565.0 ON ga005 CANCELLED AT 2022-10-04T04:31:40 DUE TO TIME LIMIT ***
logs/saycam_gimel_0.out ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ model:
10
+ base_learning_rate: 4.5e-06
11
+ params:
12
+ ddconfig:
13
+ attn_resolutions:
14
+ - 16
15
+ ch: 128
16
+ ch_mult:
17
+ - 1
18
+ - 1
19
+ - 2
20
+ - 2
21
+ - 4
22
+ double_z: false
23
+ dropout: 0.0
24
+ in_channels: 3
25
+ num_res_blocks: 2
26
+ out_ch: 3
27
+ resolution: 256
28
+ z_channels: 256
29
+ embed_dim: 256
30
+ lossconfig:
31
+ params:
32
+ codebook_weight: 1.0
33
+ disc_conditional: false
34
+ disc_in_channels: 3
35
+ disc_num_layers: 2
36
+ disc_start: 0
37
+ disc_weight: 0.75
38
+ target: vqloss.VQLPIPSWithDiscriminator
39
+ monitor: val/rec_loss
40
+ n_embed: 16384
41
+ target: vqmodel.VQModel
42
+
43
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
44
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
45
+ VQLPIPSWithDiscriminator running with hinge loss.
46
+ Loaded VQ encoder.
47
+ Data loaded: dataset contains 1723909 images, and takes 6735 training iterations per epoch.
48
+ Number of parameters: 750659840
49
+ Running on 8 GPUs total
50
+ => no checkpoint loaded, will train from scratch
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
56
+ warnings.warn(warning.format(ret))
57
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
58
+ warnings.warn(warning.format(ret))
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ Epoch: 0 | Training loss: 5.768716645541683 | Elapsed time: 5992.774663448334
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 1 | Training loss: 5.315443583320668 | Elapsed time: 5988.270876407623
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 2 | Training loss: 5.17567369805147 | Elapsed time: 5987.70140004158
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 3 | Training loss: 5.086252438730015 | Elapsed time: 5994.479656457901
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 4 | Training loss: 5.023263860757739 | Elapsed time: 5996.0818021297455
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 5 | Training loss: 4.971752702207501 | Elapsed time: 5996.032424688339
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 6 | Training loss: 4.92765151472028 | Elapsed time: 5996.8689661026
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 7 | Training loss: 4.891846964991172 | Elapsed time: 5997.144876241684
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 8 | Training loss: 4.861733108851851 | Elapsed time: 5998.581600427628
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 9 | Training loss: 4.8356013135902955 | Elapsed time: 5996.713852643967
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 10 | Training loss: 4.80712858368577 | Elapsed time: 5996.502978086472
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 11 | Training loss: 4.78290842852065 | Elapsed time: 5997.021359443665
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 12 | Training loss: 4.76151581299066 | Elapsed time: 5996.186003684998
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 13 | Training loss: 4.742412403307761 | Elapsed time: 5996.005835771561
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 14 | Training loss: 4.723503864758441 | Elapsed time: 5997.976491212845
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 15 | Training loss: 4.708198919629024 | Elapsed time: 5995.497739315033
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 16 | Training loss: 4.692782934453635 | Elapsed time: 5997.512975931168
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 17 | Training loss: 4.67519493322507 | Elapsed time: 5998.546558856964
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 18 | Training loss: 4.66058209708822 | Elapsed time: 5997.879783630371
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 19 | Training loss: 4.650417935556186 | Elapsed time: 5997.12960934639
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 20 | Training loss: 4.636197118681276 | Elapsed time: 5997.350676774979
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 21 | Training loss: 4.624867035462932 | Elapsed time: 5997.938010692596
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 22 | Training loss: 4.61198255759128 | Elapsed time: 5996.415572166443
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 23 | Training loss: 4.598085585291861 | Elapsed time: 5996.963915109634
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 24 | Training loss: 4.5896146018857324 | Elapsed time: 5996.694915056229
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 25 | Training loss: 4.579809171496804 | Elapsed time: 5996.802541732788
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 26 | Training loss: 4.567457862562309 | Elapsed time: 5998.091451406479
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 27 | Training loss: 4.563411238853721 | Elapsed time: 5996.794277667999
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
123
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
124
+ slurmstepd: error: *** JOB 25591758 ON ga001 CANCELLED AT 2022-10-05T05:28:35 ***
125
+ slurmstepd: error: *** STEP 25591758.0 ON ga001 CANCELLED AT 2022-10-05T05:28:35 ***
logs/saycam_gimel_1.out ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ model:
10
+ base_learning_rate: 4.5e-06
11
+ params:
12
+ ddconfig:
13
+ attn_resolutions:
14
+ - 16
15
+ ch: 128
16
+ ch_mult:
17
+ - 1
18
+ - 1
19
+ - 2
20
+ - 2
21
+ - 4
22
+ double_z: false
23
+ dropout: 0.0
24
+ in_channels: 3
25
+ num_res_blocks: 2
26
+ out_ch: 3
27
+ resolution: 256
28
+ z_channels: 256
29
+ embed_dim: 256
30
+ lossconfig:
31
+ params:
32
+ codebook_weight: 1.0
33
+ disc_conditional: false
34
+ disc_in_channels: 3
35
+ disc_num_layers: 2
36
+ disc_start: 0
37
+ disc_weight: 0.75
38
+ target: vqloss.VQLPIPSWithDiscriminator
39
+ monitor: val/rec_loss
40
+ n_embed: 16384
41
+ target: vqmodel.VQModel
42
+
43
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
44
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
45
+ VQLPIPSWithDiscriminator running with hinge loss.
46
+ Loaded VQ encoder.
47
+ Data loaded: dataset contains 1723909 images, and takes 6735 training iterations per epoch.
48
+ Number of parameters: 750659840
49
+ Running on 8 GPUs total
50
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt'
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
56
+ warnings.warn(warning.format(ret))
57
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
58
+ warnings.warn(warning.format(ret))
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ Epoch: 0 | Training loss: 4.549386310081793 | Elapsed time: 5992.660080194473
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 1 | Training loss: 4.543002819006055 | Elapsed time: 5987.250670909882
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 2 | Training loss: 4.531977684407563 | Elapsed time: 5987.944575548172
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 3 | Training loss: 4.5234166407992245 | Elapsed time: 5988.913918018341
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 4 | Training loss: 4.517454680214127 | Elapsed time: 5988.988751173019
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 5 | Training loss: 4.509043409847381 | Elapsed time: 5989.363956451416
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 6 | Training loss: 4.499003903814485 | Elapsed time: 5988.494083166122
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 7 | Training loss: 4.493543969744121 | Elapsed time: 5988.802041530609
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 8 | Training loss: 4.486805682511531 | Elapsed time: 5988.879884958267
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 9 | Training loss: 4.482480470994356 | Elapsed time: 5988.876097202301
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 10 | Training loss: 4.472419641065704 | Elapsed time: 5988.735966682434
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 11 | Training loss: 4.464847651037713 | Elapsed time: 5989.111615657806
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 12 | Training loss: 4.458149412851822 | Elapsed time: 5988.41983127594
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 13 | Training loss: 4.451915077961368 | Elapsed time: 5989.46940279007
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 14 | Training loss: 4.446143237236436 | Elapsed time: 5989.308108329773
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 15 | Training loss: 4.441227948338169 | Elapsed time: 5989.474314212799
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 16 | Training loss: 4.436091971804499 | Elapsed time: 5988.781164884567
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 17 | Training loss: 4.428404669666078 | Elapsed time: 5989.283316850662
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 18 | Training loss: 4.421668468304006 | Elapsed time: 5988.60914516449
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 19 | Training loss: 4.420119190251464 | Elapsed time: 5988.772259473801
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 20 | Training loss: 4.413603621513293 | Elapsed time: 5988.103999853134
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 21 | Training loss: 4.409056707338306 | Elapsed time: 5988.375903606415
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 22 | Training loss: 4.402734815516823 | Elapsed time: 5988.697876214981
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 23 | Training loss: 4.3945760062825885 | Elapsed time: 5988.430021524429
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 24 | Training loss: 4.392142767470769 | Elapsed time: 5989.888501405716
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 25 | Training loss: 4.388029863963591 | Elapsed time: 5988.6781396865845
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 26 | Training loss: 4.381039526450165 | Elapsed time: 5988.945887088776
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 27 | Training loss: 4.38213356282858 | Elapsed time: 5988.676961660385
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
123
+ slurmstepd: error: *** JOB 25671941 ON ga001 CANCELLED AT 2022-10-07T05:29:06 DUE TO TIME LIMIT ***
124
+ slurmstepd: error: *** STEP 25671941.0 ON ga001 CANCELLED AT 2022-10-07T05:29:06 DUE TO TIME LIMIT ***
125
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
logs/saycam_gimel_2.out ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
2
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
3
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
4
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
5
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
6
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
7
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
8
+ Namespace(data_path='/vast/eo41/SAY_1fps', vqconfig_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.yaml', vqmodel_path='/scratch/eo41/visual-recognition-memory/vqgan_pretrained_models/imagenet_16x16_16384.ckpt', num_workers=8, seed=0, save_dir='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models', gpt_config='GPT_gimel', vocab_size=16384, block_size=255, batch_size=32, lr=0.0003, optimizer='Adam', epochs=1000, resume='/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt', save_prefix='saycam', gpu=None, world_size=-1, rank=-1, dist_url='env://', dist_backend='nccl', local_rank=-1)
9
+ model:
10
+ base_learning_rate: 4.5e-06
11
+ params:
12
+ ddconfig:
13
+ attn_resolutions:
14
+ - 16
15
+ ch: 128
16
+ ch_mult:
17
+ - 1
18
+ - 1
19
+ - 2
20
+ - 2
21
+ - 4
22
+ double_z: false
23
+ dropout: 0.0
24
+ in_channels: 3
25
+ num_res_blocks: 2
26
+ out_ch: 3
27
+ resolution: 256
28
+ z_channels: 256
29
+ embed_dim: 256
30
+ lossconfig:
31
+ params:
32
+ codebook_weight: 1.0
33
+ disc_conditional: false
34
+ disc_in_channels: 3
35
+ disc_num_layers: 2
36
+ disc_start: 0
37
+ disc_weight: 0.75
38
+ target: vqloss.VQLPIPSWithDiscriminator
39
+ monitor: val/rec_loss
40
+ n_embed: 16384
41
+ target: vqmodel.VQModel
42
+
43
+ Working with z of shape (1, 256, 16, 16) = 65536 dimensions.
44
+ loaded pretrained LPIPS loss from taming/modules/autoencoder/lpips/vgg.pth
45
+ VQLPIPSWithDiscriminator running with hinge loss.
46
+ Loaded VQ encoder.
47
+ Data loaded: dataset contains 1723909 images, and takes 6735 training iterations per epoch.
48
+ Number of parameters: 750659840
49
+ Running on 8 GPUs total
50
+ => loaded model weights and optimizer state at checkpoint '/scratch/eo41/visual-recognition-memory/gpt_pretrained_models/saycam_gimel.pt'
51
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
52
+ warnings.warn(warning.format(ret))
53
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
54
+ warnings.warn(warning.format(ret))
55
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
56
+ warnings.warn(warning.format(ret))
57
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
58
+ warnings.warn(warning.format(ret))
59
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
60
+ warnings.warn(warning.format(ret))
61
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
62
+ warnings.warn(warning.format(ret))
63
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
64
+ warnings.warn(warning.format(ret))
65
+ /scratch/eo41/miniconda3/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.
66
+ warnings.warn(warning.format(ret))
67
+ Epoch: 0 | Training loss: 4.376433072624688 | Elapsed time: 5962.150438547134
68
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_000_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
69
+ Epoch: 1 | Training loss: 4.374933369883627 | Elapsed time: 5955.103978395462
70
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_001_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
71
+ Epoch: 2 | Training loss: 4.368323994992659 | Elapsed time: 5958.477677106857
72
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_002_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
73
+ Epoch: 3 | Training loss: 4.36417738734658 | Elapsed time: 5961.600141525269
74
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_003_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
75
+ Epoch: 4 | Training loss: 4.362494723214516 | Elapsed time: 5958.8795874118805
76
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_004_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
77
+ Epoch: 5 | Training loss: 4.357988732464683 | Elapsed time: 5959.776846408844
78
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_005_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
79
+ Epoch: 6 | Training loss: 4.351521733538168 | Elapsed time: 5956.789329528809
80
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_006_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
81
+ Epoch: 7 | Training loss: 4.3501106043789415 | Elapsed time: 5957.811106204987
82
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_007_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
83
+ Epoch: 8 | Training loss: 4.346378896921409 | Elapsed time: 5957.175212621689
84
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_008_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
85
+ Epoch: 9 | Training loss: 4.345732934473175 | Elapsed time: 5959.227693319321
86
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_009_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
87
+ Epoch: 10 | Training loss: 4.338641034751444 | Elapsed time: 5959.748023033142
88
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_010_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
89
+ Epoch: 11 | Training loss: 4.334417951416775 | Elapsed time: 5956.702304601669
90
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_011_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
91
+ Epoch: 12 | Training loss: 4.330386307864341 | Elapsed time: 5964.783320188522
92
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_012_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
93
+ Epoch: 13 | Training loss: 4.327019438853331 | Elapsed time: 5961.690758228302
94
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_013_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
95
+ Epoch: 14 | Training loss: 4.3241529451977705 | Elapsed time: 5960.315366983414
96
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_014_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
97
+ Epoch: 15 | Training loss: 4.3214996692244 | Elapsed time: 5957.221389055252
98
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_015_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
99
+ Epoch: 16 | Training loss: 4.319029978990732 | Elapsed time: 5959.791395664215
100
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_016_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
101
+ Epoch: 17 | Training loss: 4.314226237844341 | Elapsed time: 5962.10276389122
102
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_017_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
103
+ Epoch: 18 | Training loss: 4.309224044896093 | Elapsed time: 5959.8184270858765
104
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_018_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
105
+ Epoch: 19 | Training loss: 4.310250364344653 | Elapsed time: 5961.591814517975
106
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_019_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
107
+ Epoch: 20 | Training loss: 4.30579106671419 | Elapsed time: 5953.841495990753
108
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_020_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
109
+ Epoch: 21 | Training loss: 4.30334588261826 | Elapsed time: 5958.72381901741
110
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_021_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
111
+ Epoch: 22 | Training loss: 4.298879969482875 | Elapsed time: 5957.328413248062
112
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_022_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
113
+ Epoch: 23 | Training loss: 4.292635204036234 | Elapsed time: 5958.7999658584595
114
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_023_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
115
+ Epoch: 24 | Training loss: 4.292109297183857 | Elapsed time: 5960.226742982864
116
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_024_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
117
+ Epoch: 25 | Training loss: 4.2897804375303705 | Elapsed time: 5960.850795030594
118
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_025_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
119
+ Epoch: 26 | Training loss: 4.284714426141186 | Elapsed time: 5960.225991010666
120
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_026_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
121
+ Epoch: 27 | Training loss: 4.303472635068093 | Elapsed time: 5959.183020114899
122
+ Saving model to: /scratch/eo41/visual-recognition-memory/gpt_pretrained_models/model_027_saycam_GPT_gimel_256b_0.0003lr_Adamo_0s.pt
123
+ srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
124
+ slurmstepd: error: *** JOB 25711482 ON ga001 CANCELLED AT 2022-10-09T20:19:15 ***
125
+ slurmstepd: error: *** STEP 25711482.0 ON ga001 CANCELLED AT 2022-10-09T20:19:15 ***
vqgan_pretrained_models/imagenet_16x16_16384.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:845a68805098cb666420d5db93df53f3a3b6dd443e6dd85c05759c5b998cd663
3
+ size 980092370
vqgan_pretrained_models/imagenet_16x16_16384.yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ base_learning_rate: 4.5e-06
3
+ target: vqmodel.VQModel
4
+ params:
5
+ embed_dim: 256
6
+ n_embed: 16384
7
+ monitor: val/rec_loss
8
+ ddconfig:
9
+ double_z: false
10
+ z_channels: 256
11
+ resolution: 256
12
+ in_channels: 3
13
+ out_ch: 3
14
+ ch: 128
15
+ ch_mult:
16
+ - 1
17
+ - 1
18
+ - 2
19
+ - 2
20
+ - 4
21
+ num_res_blocks: 2
22
+ attn_resolutions:
23
+ - 16
24
+ dropout: 0.0
25
+ lossconfig:
26
+ target: vqloss.VQLPIPSWithDiscriminator
27
+ params:
28
+ disc_conditional: false
29
+ disc_in_channels: 3
30
+ disc_start: 0
31
+ disc_weight: 0.75
32
+ disc_num_layers: 2
33
+ codebook_weight: 1.0
34
+