diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..e7ea03ce232f3f0e0c669dcb1357886459023492 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +541358.err filter=lfs diff=lfs merge=lfs -text +541360.err filter=lfs diff=lfs merge=lfs -text +541285.err filter=lfs diff=lfs merge=lfs -text diff --git a/541270.err b/541270.err new file mode 100644 index 0000000000000000000000000000000000000000..e2430f9449d8974f7230c72ee0818899ce8437f6 --- /dev/null +++ b/541270.err @@ -0,0 +1,7 @@ +[NbConvertApp] Converting notebook HCP_downstream_raw_flatmaps.ipynb to python +[NbConvertApp] Writing 36448 bytes to HCP_downstream_raw_flatmaps.py +Traceback (most recent call last): + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_raw_flatmaps.py", line 87, in + utils.seed_everything(seed) + ^^^^ +NameError: name 'seed' is not defined diff --git a/541270.out b/541270.out new file mode 100644 index 0000000000000000000000000000000000000000..434c093fb0429e3bcac20a8e432a7ffe4e42b97f --- /dev/null +++ b/541270.out @@ -0,0 +1,5 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-133-32 +MASTER_PORT=11200 +WORLD_SIZE=1 +PID of this process = 1825497 diff --git a/541272.err b/541272.err new file mode 100644 index 0000000000000000000000000000000000000000..d63f00df4935d6c2dc140e5ba3fdfdaaba7bce38 --- /dev/null +++ b/541272.err @@ -0,0 +1,7 @@ +[NbConvertApp] Converting notebook HCP_downstream_raw_flatmaps.ipynb to python +[NbConvertApp] Writing 36446 bytes to HCP_downstream_raw_flatmaps.py +Traceback (most recent call last): + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_raw_flatmaps.py", line 737, in + optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay) + ^^^^^^^^^^^^^ +NameError: name 'learning_rate' is not defined diff --git a/541272.out b/541272.out new file mode 100644 index 0000000000000000000000000000000000000000..d9972af5d3d3480c1e1926a39debd46b523f6451 --- /dev/null +++ b/541272.out @@ -0,0 +1,8 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-133-32 +MASTER_PORT=12606 +WORLD_SIZE=1 +PID of this process = 1826825 +------ ARGS ------- + Namespace(model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=128, wandb_log=True, num_epochs=20, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=0.1, target='sex', num_workers=15, weight_decay=1e-05) +Input dimension: 737280 diff --git a/541275.err b/541275.err new file mode 100644 index 0000000000000000000000000000000000000000..74228dc04c7cd2c747f6e7aa6b7cf8de7e9dad16 --- /dev/null +++ b/541275.err @@ -0,0 +1,7 @@ +[NbConvertApp] Converting notebook HCP_downstream_raw_flatmaps.ipynb to python +[NbConvertApp] Writing 36336 bytes to HCP_downstream_raw_flatmaps.py +Traceback (most recent call last): + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_raw_flatmaps.py", line 727, in + num_iterations_per_epoch = math.ceil(flatmaps_train/batch_size) + ~~~~~~~~~~~~~~^~~~~~~~~~~ +TypeError: unsupported operand type(s) for /: 'Dataset' and 'int' diff --git a/541275.out b/541275.out new file mode 100644 index 0000000000000000000000000000000000000000..d5a43be48443a9165f2e7b6df92fbd89af741603 --- /dev/null +++ b/541275.out @@ -0,0 +1,8 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-136-246 +MASTER_PORT=17060 +WORLD_SIZE=1 +PID of this process = 564369 +------ ARGS ------- + Namespace(model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=128, wandb_log=True, num_epochs=20, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=0.1, target='sex', num_workers=15, weight_decay=1e-05) +Input dimension: 737280 diff --git a/541276.err b/541276.err new file mode 100644 index 0000000000000000000000000000000000000000..a84dacbbc98f7148d72654440da6357c781c60cb --- /dev/null +++ b/541276.err @@ -0,0 +1,50 @@ +[NbConvertApp] Converting notebook HCP_downstream_raw_flatmaps.ipynb to python +[NbConvertApp] Writing 36345 bytes to HCP_downstream_raw_flatmaps.py +wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. +wandb: Currently logged in as: ckadirt. Use `wandb login --relogin` to force relogin +wandb: Tracking run with wandb version 0.18.3 +wandb: Run data is saved locally in /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/wandb/run-20241126_204427-HCPflat_raw_beta_sex_83810 +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run HCPflat_raw_beta_sex +wandb: ⭐️ View project at https://stability.wandb.io/ckadirt/fMRI-foundation-model +wandb: 🚀 View run at https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_raw_beta_sex_83810 + Epoch 1/20 - Training: 0%| | 0/870 [00:00 + parser = argparse.ArgumentParser(description="Model Training Configuration") + ^^^^^^^^ +NameError: name 'argparse' is not defined diff --git a/541280.out b/541280.out new file mode 100644 index 0000000000000000000000000000000000000000..ab2a9b2b1405410d3b7f530232ed48e477dcdeff --- /dev/null +++ b/541280.out @@ -0,0 +1,4 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-133-32 +MASTER_PORT=13737 +WORLD_SIZE=1 diff --git a/541281.err b/541281.err new file mode 100644 index 0000000000000000000000000000000000000000..1f16f760ee339b0cf309d3d7230810fa87016fbf --- /dev/null +++ b/541281.err @@ -0,0 +1,20 @@ +[NbConvertApp] Converting notebook HCP_downstream_finetune.ipynb to python +[NbConvertApp] Writing 31636 bytes to HCP_downstream_finetune.py +Traceback (most recent call last): + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_finetune.py", line 123, in + parser.add_argument( + File "/usr/lib/python3.11/argparse.py", line 1485, in add_argument + return self._add_action(action) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/lib/python3.11/argparse.py", line 1867, in _add_action + self._optionals._add_action(action) + File "/usr/lib/python3.11/argparse.py", line 1687, in _add_action + action = super(_ArgumentGroup, self)._add_action(action) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/lib/python3.11/argparse.py", line 1499, in _add_action + self._check_conflict(action) + File "/usr/lib/python3.11/argparse.py", line 1636, in _check_conflict + conflict_handler(action, confl_optionals) + File "/usr/lib/python3.11/argparse.py", line 1645, in _handle_conflict_error + raise ArgumentError(action, message % conflict_string) +argparse.ArgumentError: argument --wandb_log/--no-wandb_log: conflicting option strings: --wandb_log, --no-wandb_log diff --git a/541281.out b/541281.out new file mode 100644 index 0000000000000000000000000000000000000000..af86f824d25ae136dcea9783e247a40cc04136d4 --- /dev/null +++ b/541281.out @@ -0,0 +1,4 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-133-32 +MASTER_PORT=13853 +WORLD_SIZE=1 diff --git a/541282.err b/541282.err new file mode 100644 index 0000000000000000000000000000000000000000..0e0154e3705d11a213541d6221454eb3f3de2902 --- /dev/null +++ b/541282.err @@ -0,0 +1,54 @@ +[NbConvertApp] Converting notebook HCP_downstream_finetune.ipynb to python +[NbConvertApp] Writing 31825 bytes to HCP_downstream_finetune.py +/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_finetune.py:658: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + state = torch.load(checkpoint_path) +wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. +wandb: Currently logged in as: ckadirt. Use `wandb login --relogin` to force relogin +wandb: Tracking run with wandb version 0.18.3 +wandb: Run data is saved locally in /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/wandb/run-20241126_213704-HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run HCPflat_large_gsrFalse__beta_sex_HCPFT +wandb: ⭐️ View project at https://stability.wandb.io/ckadirt/fMRI-foundation-model +wandb: 🚀 View run at https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 + Epoch 1/20 - Training: 0%| | 0/3479 [00:00 + outputs = model(images, gsr=gsr) # Shape: [num_train_samples, num_classes] + ^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_finetune.py", line 696, in forward + x = self.mae_model(x, global_pool=global_pool, forward_features = True) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/mae_utils/flat_models.py", line 753, in forward + x = blk(x) + ^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/mae_utils/video_vit.py", line 166, in forward + x = x + self.drop_path(self.attn(self.norm1(x))) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/mae_utils/video_vit.py", line 114, in forward + attn = (q @ k.transpose(-2, -1)) * self.scale + ~~^~~~~~~~~~~~~~~~~~~~~ +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.71 GiB. GPU 0 has a total capacity of 79.11 GiB of which 770.94 MiB is free. Including non-PyTorch memory, this process has 78.35 GiB memory in use. Of the allocated memory 74.92 GiB is allocated by PyTorch, and 2.77 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/541282.out b/541282.out new file mode 100644 index 0000000000000000000000000000000000000000..d7c02b32b27b2ffb48bd40e747e4aea7e8056692 --- /dev/null +++ b/541282.out @@ -0,0 +1,70 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-133-32 +MASTER_PORT=14835 +WORLD_SIZE=1 +------ ARGS ------- + Namespace(found_model_name='HCPflat_large_gsrFalse_', epoch_checkpoint='epoch99.pth', model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=32, wandb_log=True, num_epochs=20, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=0.0003, target='sex', num_workers=15, weight_decay=0.001, global_pool=True) +outdir /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ +Loaded config.yaml from ckpt folder /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ + +__CONFIG__ +base_lr = 0.001 +batch_size = 32 +ckpt_interval = 5 +ckpt_saving = True +cls_embed = True +contrastive_loss_weight = 1.0 +datasets_to_include = HCP +decoder_embed_dim = 512 +grad_accumulation_steps = 1 +grad_clip = 1.0 +gsr = False +hcp_flat_path = /weka/proj-medarc/shared/HCP-Flat +mask_ratio = 0.75 +model_name = HCPflat_large_gsrFalse_ +no_qkv_bias = False +norm_pix_loss = False +nsd_flat_path = /weka/proj-medarc/shared/NSD-Flat +num_epochs = 100 +num_frames = 16 +num_samples_per_epoch = 200000 +num_workers = 10 +patch_size = 16 +pct_masks_to_decode = 1 +plotting = True +pred_t_dim = 8 +print_interval = 20 +probe_base_lr = 0.0003 +probe_batch_size = 8 +probe_num_epochs = 30 +probe_num_samples_per_epoch = 100000 +resume_from_ckpt = True +seed = 42 +sep_pos_embed = True +t_patch_size = 2 +test_num_samples_per_epoch = 50000 +test_set = False +trunc_init = False +use_contrastive_loss = False +wandb_log = True + + +WORLD_SIZE=1 +PID of this process = 1885623 +global_pool = True +gsr = False +Creating datasets +Datasets ready +img_size (144, 320) patch_size (16, 16) frames 16 t_patch_size 2 +model initialized +latest_checkpoint: epoch99.pth + +Loaded checkpoint epoch99.pth from /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ + +Input dimension: 1024 +total_steps 69580 +wandb_config: + {'model_name': 'HCPflat_large_gsrFalse__HCP_FT_sex', 'batch_size': 32, 'weight_decay': 0.001, 'num_epochs': 20, 'seed': 42, 'lr_scheduler_type': 'cycle', 'save_ckpt': False, 'max_lr': 0.0003, 'target': 'sex', 'num_workers': 15} +wandb_id: HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 +wandb: 🚀 View run HCPflat_large_gsrFalse__beta_sex_HCPFT at: https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 +wandb: Find logs at: wandb/run-20241126_213704-HCPflat_large_gsrFalse__beta_sex_HCPFT_83810/logs diff --git a/541283.err b/541283.err new file mode 100644 index 0000000000000000000000000000000000000000..de418fd29a15e717bac1395b3ef1d71c99007cab --- /dev/null +++ b/541283.err @@ -0,0 +1,54 @@ +[NbConvertApp] Converting notebook HCP_downstream_finetune.ipynb to python +[NbConvertApp] Writing 31825 bytes to HCP_downstream_finetune.py +/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_finetune.py:658: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + state = torch.load(checkpoint_path) +wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. +wandb: Currently logged in as: ckadirt. Use `wandb login --relogin` to force relogin +wandb: Tracking run with wandb version 0.18.3 +wandb: Run data is saved locally in /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/wandb/run-20241126_213826-HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 +wandb: Run `wandb offline` to turn off syncing. +wandb: Resuming run HCPflat_large_gsrFalse__beta_sex_HCPFT +wandb: ⭐️ View project at https://stability.wandb.io/ckadirt/fMRI-foundation-model +wandb: 🚀 View run at https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 + Epoch 1/20 - Training: 0%| | 0/4638 [00:00 + outputs = model(images, gsr=gsr) # Shape: [num_train_samples, num_classes] + ^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_finetune.py", line 696, in forward + x = self.mae_model(x, global_pool=global_pool, forward_features = True) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/mae_utils/flat_models.py", line 753, in forward + x = blk(x) + ^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/mae_utils/video_vit.py", line 166, in forward + x = x + self.drop_path(self.attn(self.norm1(x))) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/mae_utils/video_vit.py", line 114, in forward + attn = (q @ k.transpose(-2, -1)) * self.scale + ~~^~~~~~~~~~~~~~~~~~~~~ +torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.04 GiB. GPU 0 has a total capacity of 79.11 GiB of which 1.06 GiB is free. Including non-PyTorch memory, this process has 78.04 GiB memory in use. Of the allocated memory 75.85 GiB is allocated by PyTorch, and 1.52 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables) diff --git a/541283.out b/541283.out new file mode 100644 index 0000000000000000000000000000000000000000..6a4d499e379f9b7b6f8ba9c64b44a91280e19265 --- /dev/null +++ b/541283.out @@ -0,0 +1,70 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-133-32 +MASTER_PORT=17673 +WORLD_SIZE=1 +------ ARGS ------- + Namespace(found_model_name='HCPflat_large_gsrFalse_', epoch_checkpoint='epoch99.pth', model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=24, wandb_log=True, num_epochs=20, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=0.0003, target='sex', num_workers=15, weight_decay=0.001, global_pool=True) +outdir /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ +Loaded config.yaml from ckpt folder /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ + +__CONFIG__ +base_lr = 0.001 +batch_size = 32 +ckpt_interval = 5 +ckpt_saving = True +cls_embed = True +contrastive_loss_weight = 1.0 +datasets_to_include = HCP +decoder_embed_dim = 512 +grad_accumulation_steps = 1 +grad_clip = 1.0 +gsr = False +hcp_flat_path = /weka/proj-medarc/shared/HCP-Flat +mask_ratio = 0.75 +model_name = HCPflat_large_gsrFalse_ +no_qkv_bias = False +norm_pix_loss = False +nsd_flat_path = /weka/proj-medarc/shared/NSD-Flat +num_epochs = 100 +num_frames = 16 +num_samples_per_epoch = 200000 +num_workers = 10 +patch_size = 16 +pct_masks_to_decode = 1 +plotting = True +pred_t_dim = 8 +print_interval = 20 +probe_base_lr = 0.0003 +probe_batch_size = 8 +probe_num_epochs = 30 +probe_num_samples_per_epoch = 100000 +resume_from_ckpt = True +seed = 42 +sep_pos_embed = True +t_patch_size = 2 +test_num_samples_per_epoch = 50000 +test_set = False +trunc_init = False +use_contrastive_loss = False +wandb_log = True + + +WORLD_SIZE=1 +PID of this process = 1888215 +global_pool = True +gsr = False +Creating datasets +Datasets ready +img_size (144, 320) patch_size (16, 16) frames 16 t_patch_size 2 +model initialized +latest_checkpoint: epoch99.pth + +Loaded checkpoint epoch99.pth from /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ + +Input dimension: 1024 +total_steps 92760 +wandb_config: + {'model_name': 'HCPflat_large_gsrFalse__HCP_FT_sex', 'batch_size': 24, 'weight_decay': 0.001, 'num_epochs': 20, 'seed': 42, 'lr_scheduler_type': 'cycle', 'save_ckpt': False, 'max_lr': 0.0003, 'target': 'sex', 'num_workers': 15} +wandb_id: HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 +wandb: 🚀 View run HCPflat_large_gsrFalse__beta_sex_HCPFT at: https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 +wandb: Find logs at: wandb/run-20241126_213826-HCPflat_large_gsrFalse__beta_sex_HCPFT_83810/logs diff --git a/541284.err b/541284.err new file mode 100644 index 0000000000000000000000000000000000000000..4db0e3bcfea9a5374345d8715cbdeb1e0b8ae5f6 --- /dev/null +++ b/541284.err @@ -0,0 +1,16 @@ +[NbConvertApp] Converting notebook HCP_downstream_finetune.ipynb to python +[NbConvertApp] Writing 31825 bytes to HCP_downstream_finetune.py +/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_finetune.py:658: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + state = torch.load(checkpoint_path) +wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. +wandb: Currently logged in as: ckadirt. Use `wandb login --relogin` to force relogin +wandb: Tracking run with wandb version 0.18.3 +wandb: Run data is saved locally in /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/wandb/run-20241126_214117-HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 +wandb: Run `wandb offline` to turn off syncing. +wandb: Resuming run HCPflat_large_gsrFalse__beta_sex_HCPFT +wandb: ⭐️ View project at https://stability.wandb.io/ckadirt/fMRI-foundation-model +wandb: 🚀 View run at https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_large_gsrFalse__beta_sex_HCPFT_83810 + Epoch 1/20 - Training: 0%| | 0/6957 [00:00 + loss = criterion(outputs, labels) + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/loss.py", line 1188, in forward + return F.cross_entropy(input, target, weight=self.weight, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/functional.py", line 3104, in cross_entropy + return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +RuntimeError: 0D or 1D target tensor expected, multi-target not supported diff --git a/541345.out b/541345.out new file mode 100644 index 0000000000000000000000000000000000000000..9b7b1e774ff15592499af01f620190743c9fbbaf --- /dev/null +++ b/541345.out @@ -0,0 +1,15 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-135-126 +MASTER_PORT=13292 +WORLD_SIZE=1 +PID of this process = 2152625 +------ ARGS ------- + Namespace(model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=256, wandb_log=True, num_epochs=50, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=1e-05, target='trial_type', num_workers=15, weight_decay=1e-05) +Number of classes: 21 +Input dimension: 737280 +total_steps 21750 +wandb_config: + {'model_name': 'HCPflat_raw_trial_type', 'batch_size': 256, 'weight_decay': 1e-05, 'num_epochs': 50, 'seed': 42, 'lr_scheduler_type': 'cycle', 'save_ckpt': False, 'max_lr': 1e-05, 'target': 'trial_type', 'num_workers': 15} +wandb_id: HCPflat_raw_beta_trial_type_3d482888-f4eb-482f-a3c2-2056642d97e2 +wandb: 🚀 View run HCPflat_raw_beta_trial_type at: https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_raw_beta_trial_type_3d482888-f4eb-482f-a3c2-2056642d97e2 +wandb: Find logs at: wandb/run-20241127_015933-HCPflat_raw_beta_trial_type_3d482888-f4eb-482f-a3c2-2056642d97e2/logs diff --git a/541349.err b/541349.err new file mode 100644 index 0000000000000000000000000000000000000000..ae50e39118251932891e8c80a36a89afa276db82 --- /dev/null +++ b/541349.err @@ -0,0 +1,18 @@ +[NbConvertApp] Converting notebook HCP_downstream_raw_flatmaps.ipynb to python +[NbConvertApp] Writing 36673 bytes to HCP_downstream_raw_flatmaps.py +wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. +wandb: Currently logged in as: ckadirt. Use `wandb login --relogin` to force relogin +wandb: Tracking run with wandb version 0.18.3 +wandb: Run data is saved locally in /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/wandb/run-20241127_020911-HCPflat_raw_beta_trial_type_81853367-3038-4b91-805f-5066c048cef4 +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run HCPflat_raw_beta_trial_type +wandb: ⭐️ View project at https://stability.wandb.io/ckadirt/fMRI-foundation-model +wandb: 🚀 View run at https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_raw_beta_trial_type_81853367-3038-4b91-805f-5066c048cef4 + Epoch 1/50 - Training: 0%| | 0/435 [00:00 + loss = criterion(outputs, labels.squeeze()) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/loss.py", line 734, in forward + return F.binary_cross_entropy_with_logits(input, target, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/functional.py", line 3242, in binary_cross_entropy_with_logits + raise ValueError(f"Target size ({target.size()}) must be the same as input size ({input.size()})") +ValueError: Target size (torch.Size([16])) must be the same as input size (torch.Size([16, 1])) diff --git a/541363.out b/541363.out new file mode 100644 index 0000000000000000000000000000000000000000..3dc3bc3b55644f7ec4126179751e9caaf35b995d --- /dev/null +++ b/541363.out @@ -0,0 +1,70 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-133-32 +MASTER_PORT=17075 +WORLD_SIZE=1 +------ ARGS ------- + Namespace(found_model_name='NSDflat_large_gsrFalse_', epoch_checkpoint='epoch99.pth', model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=16, wandb_log=True, num_epochs=20, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=0.0001, target='sex', num_workers=10, weight_decay=1e-05, global_pool=True) +outdir /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/NSDflat_large_gsrFalse_ +Loaded config.yaml from ckpt folder /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/NSDflat_large_gsrFalse_ + +__CONFIG__ +base_lr = 0.001 +batch_size = 32 +ckpt_interval = 5 +ckpt_saving = True +cls_embed = True +contrastive_loss_weight = 1.0 +datasets_to_include = NSD +decoder_embed_dim = 512 +grad_accumulation_steps = 1 +grad_clip = 1.0 +gsr = False +hcp_flat_path = /weka/proj-medarc/shared/HCP-Flat +mask_ratio = 0.75 +model_name = NSDflat_large_gsrFalse_ +no_qkv_bias = False +norm_pix_loss = False +nsd_flat_path = /weka/proj-medarc/shared/NSD-Flat +num_epochs = 100 +num_frames = 16 +num_samples_per_epoch = 200000 +num_workers = 10 +patch_size = 16 +pct_masks_to_decode = 1 +plotting = True +pred_t_dim = 8 +print_interval = 20 +probe_base_lr = 0.0003 +probe_batch_size = 8 +probe_num_epochs = 30 +probe_num_samples_per_epoch = 100000 +resume_from_ckpt = True +seed = 42 +sep_pos_embed = True +t_patch_size = 2 +test_num_samples_per_epoch = 50000 +test_set = False +trunc_init = False +use_contrastive_loss = False +wandb_log = True + + +WORLD_SIZE=1 +PID of this process = 2739771 +global_pool = True +gsr = False +Creating datasets +Datasets ready +img_size (144, 320) patch_size (16, 16) frames 16 t_patch_size 2 +model initialized +latest_checkpoint: epoch99.pth + +Loaded checkpoint epoch99.pth from /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/NSDflat_large_gsrFalse_ + +Input dimension: 1024 +total_steps 139140 +wandb_config: + {'model_name': 'NSDflat_large_gsrFalse__HCP_FT_sex', 'batch_size': 16, 'weight_decay': 1e-05, 'num_epochs': 20, 'seed': 42, 'lr_scheduler_type': 'cycle', 'save_ckpt': False, 'max_lr': 0.0001, 'target': 'sex', 'num_workers': 10} +wandb_id: NSDflat_large_gsrFalse__beta_sex_HCPFT_de7deaf6-6880-4bbb-875a-3b62210aceaa +wandb: 🚀 View run NSDflat_large_gsrFalse__beta_sex_HCPFT at: https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/NSDflat_large_gsrFalse__beta_sex_HCPFT_de7deaf6-6880-4bbb-875a-3b62210aceaa +wandb: Find logs at: wandb/run-20241127_093951-NSDflat_large_gsrFalse__beta_sex_HCPFT_de7deaf6-6880-4bbb-875a-3b62210aceaa/logs diff --git a/541473.err b/541473.err new file mode 100644 index 0000000000000000000000000000000000000000..5c1a16b1ee69675105b2670a4e57cd05415320b7 --- /dev/null +++ b/541473.err @@ -0,0 +1,16 @@ +[NbConvertApp] Converting notebook HCP_downstream_raw_flatmaps.ipynb to python +[NbConvertApp] Writing 36675 bytes to HCP_downstream_raw_flatmaps.py +wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. +wandb: Currently logged in as: ckadirt. Use `wandb login --relogin` to force relogin +wandb: Tracking run with wandb version 0.18.3 +wandb: Run data is saved locally in /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/wandb/run-20241127_125303-HCPflat_raw_beta_trial_type_a0e1e642-966f-441b-9bc7-974dce26cba1 +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run HCPflat_raw_beta_trial_type +wandb: ⭐️ View project at https://stability.wandb.io/ckadirt/fMRI-foundation-model +wandb: 🚀 View run at https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_raw_beta_trial_type_a0e1e642-966f-441b-9bc7-974dce26cba1 + Epoch 1/50 - Training: 0%| | 0/435 [00:00 + loss = criterion(outputs, labels.squeeze()) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl + return forward_call(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/modules/loss.py", line 734, in forward + return F.binary_cross_entropy_with_logits(input, target, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/admin/home-ckadirt/foundation_env/lib/python3.11/site-packages/torch/nn/functional.py", line 3242, in binary_cross_entropy_with_logits + raise ValueError(f"Target size ({target.size()}) must be the same as input size ({input.size()})") +ValueError: Target size (torch.Size([16])) must be the same as input size (torch.Size([16, 1])) diff --git a/541519.out b/541519.out new file mode 100644 index 0000000000000000000000000000000000000000..cb08536c25725e517910aae65a02dd5244c8f6ed --- /dev/null +++ b/541519.out @@ -0,0 +1,70 @@ +NUM_GPUS=1 +MASTER_ADDR=ip-10-0-142-24 +MASTER_PORT=16683 +WORLD_SIZE=1 +------ ARGS ------- + Namespace(found_model_name='HCPflat_large_gsrFalse_', epoch_checkpoint='epoch99.pth', model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=16, wandb_log=True, num_epochs=20, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=3e-05, target='sex', num_workers=10, weight_decay=0.01, global_pool=True) +outdir /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ +Loaded config.yaml from ckpt folder /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ + +__CONFIG__ +base_lr = 0.001 +batch_size = 32 +ckpt_interval = 5 +ckpt_saving = True +cls_embed = True +contrastive_loss_weight = 1.0 +datasets_to_include = HCP +decoder_embed_dim = 512 +grad_accumulation_steps = 1 +grad_clip = 1.0 +gsr = False +hcp_flat_path = /weka/proj-medarc/shared/HCP-Flat +mask_ratio = 0.75 +model_name = HCPflat_large_gsrFalse_ +no_qkv_bias = False +norm_pix_loss = False +nsd_flat_path = /weka/proj-medarc/shared/NSD-Flat +num_epochs = 100 +num_frames = 16 +num_samples_per_epoch = 200000 +num_workers = 10 +patch_size = 16 +pct_masks_to_decode = 1 +plotting = True +pred_t_dim = 8 +print_interval = 20 +probe_base_lr = 0.0003 +probe_batch_size = 8 +probe_num_epochs = 30 +probe_num_samples_per_epoch = 100000 +resume_from_ckpt = True +seed = 42 +sep_pos_embed = True +t_patch_size = 2 +test_num_samples_per_epoch = 50000 +test_set = False +trunc_init = False +use_contrastive_loss = False +wandb_log = True + + +WORLD_SIZE=1 +PID of this process = 2723140 +global_pool = True +gsr = False +Creating datasets +Datasets ready +img_size (144, 320) patch_size (16, 16) frames 16 t_patch_size 2 +model initialized +latest_checkpoint: epoch99.pth + +Loaded checkpoint epoch99.pth from /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/checkpoints/HCPflat_large_gsrFalse_ + +Input dimension: 1024 +total_steps 139140 +wandb_config: + {'model_name': 'HCPflat_large_gsrFalse__HCP_FT_sex', 'batch_size': 16, 'weight_decay': 0.01, 'num_epochs': 20, 'seed': 42, 'lr_scheduler_type': 'cycle', 'save_ckpt': False, 'max_lr': 3e-05, 'target': 'sex', 'num_workers': 10} +wandb_id: HCPflat_large_gsrFalse__beta_sex_HCPFT_688eeb4d-cc29-4131-8a10-baa48809d695 +wandb: 🚀 View run HCPflat_large_gsrFalse__beta_sex_HCPFT at: https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_large_gsrFalse__beta_sex_HCPFT_688eeb4d-cc29-4131-8a10-baa48809d695 +wandb: Find logs at: wandb/run-20241127_145229-HCPflat_large_gsrFalse__beta_sex_HCPFT_688eeb4d-cc29-4131-8a10-baa48809d695/logs diff --git a/541537.err b/541537.err new file mode 100644 index 0000000000000000000000000000000000000000..96858b4cdfb57965b5fe9a2484f87fd052848f51 --- /dev/null +++ b/541537.err @@ -0,0 +1,27 @@ +[NbConvertApp] Converting notebook HCP_downstream_finetune.ipynb to python +[NbConvertApp] Writing 32003 bytes to HCP_downstream_finetune.py +/weka/proj-fmri/ckadirt/fMRI-foundation-model/src/HCP_downstream_finetune.py:658: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature. + state = torch.load(checkpoint_path) +wandb: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information. +wandb: Currently logged in as: ckadirt. Use `wandb login --relogin` to force relogin +wandb: Tracking run with wandb version 0.18.3 +wandb: Run data is saved locally in /weka/proj-fmri/ckadirt/fMRI-foundation-model/src/wandb/run-20241127_145722-HCPflat_large_gsrFalse__beta_sex_HCPFT_2650ad5d-38cf-40f1-81a2-a7df17c52ddd +wandb: Run `wandb offline` to turn off syncing. +wandb: Syncing run HCPflat_large_gsrFalse__beta_sex_HCPFT +wandb: ⭐️ View project at https://stability.wandb.io/ckadirt/fMRI-foundation-model +wandb: 🚀 View run at https://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_large_gsrFalse__beta_sex_HCPFT_2650ad5d-38cf-40f1-81a2-a7df17c52ddd + Epoch 1/20 - Training: 0%| | 0/6957 [00:00