|
NUM_GPUS=1 |
|
MASTER_ADDR=ip-10-0-131-135 |
|
MASTER_PORT=18962 |
|
WORLD_SIZE=1 |
|
PID of this process = 1324243 |
|
------ ARGS ------- |
|
Namespace(model_suffix='beta', hcp_flat_path='/weka/proj-medarc/shared/HCP-Flat', batch_size=128, wandb_log=True, num_epochs=20, lr_scheduler_type='cycle', save_ckpt=False, seed=42, max_lr=0.001, target='age', num_workers=15, weight_decay=1e-05) |
|
Input dimension: 737280 |
|
total_steps 17400 |
|
wandb_config: |
|
{'model_name': 'HCPflat_raw_age', 'batch_size': 128, 'weight_decay': 1e-05, 'num_epochs': 20, 'seed': 42, 'lr_scheduler_type': 'cycle', 'save_ckpt': False, 'max_lr': 0.001, 'target': 'age', 'num_workers': 15} |
|
wandb_id: HCPflat_raw_beta_age_9a3e14f1-ec90-47c9-a06e-a395872f2271 |
|
Step [100/870] - Training Loss: 0.7004 - Training MSE: 98.7605 |
|
Step [200/870] - Training Loss: 0.9553 - Training MSE: 104.1519 |
|
Step [300/870] - Training Loss: 1.9018 - Training MSE: 129.4920 |
|
Step [400/870] - Training Loss: 5.3471 - Training MSE: 222.5458 |
|
Step [500/870] - Training Loss: 15.9923 - Training MSE: 460.4371 |
|
Step [600/870] - Training Loss: 32.0503 - Training MSE: 927.5124 |
|
Step [700/870] - Training Loss: 70.9650 - Training MSE: 1574.1360 |
|
Step [800/870] - Training Loss: 80.0552 - Training MSE: 2592.0538 |
|
Epoch [1/20] - Training Loss: 27.4343, Training MSE: 3507.0528 - Validation Loss: 131.3325, Validation MSE: 16734.0162 |
|
Step [100/870] - Training Loss: 225.2205 - Training MSE: 30714.7531 |
|
Step [200/870] - Training Loss: 292.3234 - Training MSE: 33573.5262 |
|
Step [300/870] - Training Loss: 292.7283 - Training MSE: 36038.9369 |
|
Step [400/870] - Training Loss: 299.2356 - Training MSE: 37454.6156 |
|
Step [500/870] - Training Loss: 443.3794 - Training MSE: 38764.3162 |
|
Step [600/870] - Training Loss: 427.7180 - Training MSE: 41074.9210 |
|
Step [700/870] - Training Loss: 394.7606 - Training MSE: 42823.9646 |
|
Step [800/870] - Training Loss: 427.3995 - Training MSE: 44576.2413 |
|
Epoch [2/20] - Training Loss: 355.6319, Training MSE: 45505.3376 - Validation Loss: 424.2622, Validation MSE: 54209.6258 |
|
Step [100/870] - Training Loss: 616.6658 - Training MSE: 91859.6765 |
|
Step [200/870] - Training Loss: 594.7969 - Training MSE: 88332.5994 |
|
Step [300/870] - Training Loss: 599.6354 - Training MSE: 89295.5247 |
|
Step [400/870] - Training Loss: 575.7101 - Training MSE: 88032.4892 |
|
Step [500/870] - Training Loss: 503.4290 - Training MSE: 87341.0175 |
|
Step [600/870] - Training Loss: 449.5532 - Training MSE: 86012.6966 |
|
Step [700/870] - Training Loss: 693.0297 - Training MSE: 85016.7206 |
|
Step [800/870] - Training Loss: 551.1360 - Training MSE: 83246.9525 |
|
Epoch [3/20] - Training Loss: 641.0600, Training MSE: 82029.1618 - Validation Loss: 380.8437, Validation MSE: 48652.5277 |
|
Step [100/870] - Training Loss: 595.5380 - Training MSE: 66653.8484 |
|
Step [200/870] - Training Loss: 620.8188 - Training MSE: 64999.0662 |
|
Step [300/870] - Training Loss: 814.8269 - Training MSE: 64970.9618 |
|
Step [400/870] - Training Loss: 465.9383 - Training MSE: 65667.7809 |
|
Step [500/870] - Training Loss: 468.5602 - Training MSE: 64540.3150 |
|
Step [600/870] - Training Loss: 567.9731 - Training MSE: 63776.4097 |
|
Step [700/870] - Training Loss: 458.1937 - Training MSE: 62471.3650 |
|
Step [800/870] - Training Loss: 486.8877 - Training MSE: 61673.2599 |
|
Epoch [4/20] - Training Loss: 475.5952, Training MSE: 60863.9278 - Validation Loss: 230.0277, Validation MSE: 29384.7977 |
|
Step [100/870] - Training Loss: 312.2596 - Training MSE: 43578.8527 |
|
Step [200/870] - Training Loss: 289.8408 - Training MSE: 43494.8653 |
|
Step [300/870] - Training Loss: 367.2291 - Training MSE: 42418.5304 |
|
Step [400/870] - Training Loss: 367.6751 - Training MSE: 42794.9546 |
|
Step [500/870] - Training Loss: 250.6227 - Training MSE: 42133.1672 |
|
Step [600/870] - Training Loss: 224.6889 - Training MSE: 41366.5701 |
|
Step [700/870] - Training Loss: 297.7198 - Training MSE: 40742.5247 |
|
Step [800/870] - Training Loss: 285.0359 - Training MSE: 40988.5822 |
|
Epoch [5/20] - Training Loss: 320.3213, Training MSE: 40992.2368 - Validation Loss: 343.8250, Validation MSE: 43935.6901 |
|
Step [100/870] - Training Loss: 403.4899 - Training MSE: 41564.5185 |
|
Step [200/870] - Training Loss: 280.5116 - Training MSE: 38702.8446 |
|
Step [300/870] - Training Loss: 197.1333 - Training MSE: 37270.6040 |
|
Step [400/870] - Training Loss: 219.3125 - Training MSE: 36088.7097 |
|
Step [500/870] - Training Loss: 214.0476 - Training MSE: 36101.8102 |
|
Step [600/870] - Training Loss: 249.8342 - Training MSE: 35435.7052 |
|
Step [700/870] - Training Loss: 253.3519 - Training MSE: 35137.2864 |
|
Step [800/870] - Training Loss: 260.9366 - Training MSE: 35056.1371 |
|
Epoch [6/20] - Training Loss: 272.7941, Training MSE: 34906.8847 - Validation Loss: 181.5033, Validation MSE: 23178.8234 |
|
Step [100/870] - Training Loss: 259.5545 - Training MSE: 41335.3267 |
|
Step [200/870] - Training Loss: 267.8753 - Training MSE: 38545.9863 |
|
Step [300/870] - Training Loss: 279.0856 - Training MSE: 38170.9948 |
|
Step [400/870] - Training Loss: 253.6445 - Training MSE: 36542.9225 |
|
Step [500/870] - Training Loss: 270.2076 - Training MSE: 35589.3113 |
|
Step [600/870] - Training Loss: 238.1767 - Training MSE: 34781.3647 |
|
Step [700/870] - Training Loss: 210.1095 - Training MSE: 34110.7147 |
|
Step [800/870] - Training Loss: 170.0861 - Training MSE: 33597.5924 |
|
Epoch [7/20] - Training Loss: 260.1132, Training MSE: 33286.1531 - Validation Loss: 149.1776, Validation MSE: 19066.1705 |
|
Step [100/870] - Training Loss: 213.6669 - Training MSE: 30238.3354 |
|
Step [200/870] - Training Loss: 280.7238 - Training MSE: 37416.2637 |
|
Step [300/870] - Training Loss: 199.6157 - Training MSE: 35362.4516 |
|
Step [400/870] - Training Loss: 293.6560 - Training MSE: 34585.9442 |
|
Step [500/870] - Training Loss: 243.5896 - Training MSE: 33888.9126 |
|
Step [600/870] - Training Loss: 250.7629 - Training MSE: 33711.0958 |
|
Step [700/870] - Training Loss: 204.3937 - Training MSE: 33619.7849 |
|
Step [800/870] - Training Loss: 297.2823 - Training MSE: 32938.8256 |
|
Epoch [8/20] - Training Loss: 253.8449, Training MSE: 32483.8266 - Validation Loss: 138.4049, Validation MSE: 17682.2886 |
|
Step [100/870] - Training Loss: 185.7414 - Training MSE: 26811.0968 |
|
Step [200/870] - Training Loss: 218.8939 - Training MSE: 26100.9550 |
|
Step [300/870] - Training Loss: 162.8873 - Training MSE: 26417.2719 |
|
Step [400/870] - Training Loss: 161.6969 - Training MSE: 25955.3044 |
|
Step [500/870] - Training Loss: 207.4261 - Training MSE: 25493.3734 |
|
Step [600/870] - Training Loss: 187.3801 - Training MSE: 25508.5498 |
|
Step [700/870] - Training Loss: 214.4618 - Training MSE: 25381.5527 |
|
Step [800/870] - Training Loss: 180.7133 - Training MSE: 25397.3536 |
|
Epoch [9/20] - Training Loss: 196.7826, Training MSE: 25180.1054 - Validation Loss: 116.9404, Validation MSE: 14922.8448 |
|
Step [100/870] - Training Loss: 156.8096 - Training MSE: 21927.3050 |
|
Step [200/870] - Training Loss: 150.6434 - Training MSE: 20166.1289 |
|
Step [300/870] - Training Loss: 156.5338 - Training MSE: 19752.6387 |
|
Step [400/870] - Training Loss: 115.4449 - Training MSE: 19134.1077 |
|
Step [500/870] - Training Loss: 151.5466 - Training MSE: 18829.4417 |
|
Step [600/870] - Training Loss: 133.8123 - Training MSE: 19151.8094 |
|
Step [700/870] - Training Loss: 150.4475 - Training MSE: 19053.8740 |
|
Step [800/870] - Training Loss: 134.3665 - Training MSE: 18898.9213 |
|
Epoch [10/20] - Training Loss: 145.9114, Training MSE: 18671.6165 - Validation Loss: 84.6188, Validation MSE: 10794.5938 |
|
Step [100/870] - Training Loss: 110.3017 - Training MSE: 14012.7743 |
|
Step [200/870] - Training Loss: 89.6949 - Training MSE: 13419.7123 |
|
Step [300/870] - Training Loss: 99.2646 - Training MSE: 13079.7575 |
|
Step [400/870] - Training Loss: 93.9918 - Training MSE: 12765.8371 |
|
Step [500/870] - Training Loss: 88.8151 - Training MSE: 12486.5892 |
|
Step [600/870] - Training Loss: 143.9832 - Training MSE: 12778.8930 |
|
Step [700/870] - Training Loss: 98.1826 - Training MSE: 12722.4271 |
|
Step [800/870] - Training Loss: 69.2268 - Training MSE: 12515.3702 |
|
Epoch [11/20] - Training Loss: 97.2335, Training MSE: 12443.2557 - Validation Loss: 49.3342, Validation MSE: 6300.2935 |
|
Step [100/870] - Training Loss: 84.1289 - Training MSE: 12731.3686 |
|
Step [200/870] - Training Loss: 101.6131 - Training MSE: 11380.9970 |
|
Step [300/870] - Training Loss: 68.8949 - Training MSE: 10543.4813 |
|
Step [400/870] - Training Loss: 67.8553 - Training MSE: 9954.5183 |
|
Step [500/870] - Training Loss: 69.3201 - Training MSE: 9525.7463 |
|
Step [600/870] - Training Loss: 61.4179 - Training MSE: 9232.0457 |
|
Step [700/870] - Training Loss: 58.9125 - Training MSE: 8939.9569 |
|
Step [800/870] - Training Loss: 61.9412 - Training MSE: 8919.2549 |
|
Epoch [12/20] - Training Loss: 68.8162, Training MSE: 8806.7756 - Validation Loss: 34.1533, Validation MSE: 4359.8410 |
|
Step [100/870] - Training Loss: 39.4785 - Training MSE: 5607.7454 |
|
Step [200/870] - Training Loss: 43.6887 - Training MSE: 5866.7379 |
|
Step [300/870] - Training Loss: 51.5316 - Training MSE: 5913.9918 |
|
Step [400/870] - Training Loss: 44.5591 - Training MSE: 5827.7686 |
|
Step [500/870] - Training Loss: 56.4356 - Training MSE: 5818.0397 |
|
Step [600/870] - Training Loss: 64.3104 - Training MSE: 6190.6625 |
|
Step [700/870] - Training Loss: 45.7098 - Training MSE: 6203.7776 |
|
Step [800/870] - Training Loss: 37.0419 - Training MSE: 6119.6922 |
|
Epoch [13/20] - Training Loss: 47.2533, Training MSE: 6047.1326 - Validation Loss: 24.4559, Validation MSE: 3123.7767 |
|
Step [100/870] - Training Loss: 24.0725 - Training MSE: 4543.2675 |
|
Step [200/870] - Training Loss: 33.1887 - Training MSE: 4195.8679 |
|
Step [300/870] - Training Loss: 27.0619 - Training MSE: 4041.4841 |
|
Step [400/870] - Training Loss: 39.4217 - Training MSE: 4002.0279 |
|
Step [500/870] - Training Loss: 31.7641 - Training MSE: 4235.0661 |
|
Step [600/870] - Training Loss: 25.3477 - Training MSE: 4154.7053 |
|
Step [700/870] - Training Loss: 28.7334 - Training MSE: 4054.2543 |
|
Step [800/870] - Training Loss: 25.1178 - Training MSE: 3951.9298 |
|
Epoch [14/20] - Training Loss: 30.4371, Training MSE: 3894.8694 - Validation Loss: 15.2119, Validation MSE: 1942.3499 |
|
Step [100/870] - Training Loss: 19.7150 - Training MSE: 2138.6110 |
|
Step [200/870] - Training Loss: 14.8900 - Training MSE: 2099.8135 |
|
Step [300/870] - Training Loss: 14.9437 - Training MSE: 2029.9849 |
|
Step [400/870] - Training Loss: 16.5218 - Training MSE: 1988.6470 |
|
Step [500/870] - Training Loss: 13.5388 - Training MSE: 1959.2617 |
|
Step [600/870] - Training Loss: 12.7145 - Training MSE: 1932.0582 |
|
Step [700/870] - Training Loss: 15.4000 - Training MSE: 1935.8872 |
|
Step [800/870] - Training Loss: 12.4422 - Training MSE: 2013.0126 |
|
Epoch [15/20] - Training Loss: 15.5981, Training MSE: 1996.1209 - Validation Loss: 7.5625, Validation MSE: 966.0334 |
|
Step [100/870] - Training Loss: 10.2901 - Training MSE: 2687.8184 |
|
Step [200/870] - Training Loss: 11.1747 - Training MSE: 1969.2730 |
|
Step [300/870] - Training Loss: 6.5812 - Training MSE: 1654.0797 |
|
Step [400/870] - Training Loss: 7.1049 - Training MSE: 1479.5801 |
|
Step [500/870] - Training Loss: 6.1494 - Training MSE: 1376.3151 |
|
Step [600/870] - Training Loss: 8.5310 - Training MSE: 1297.4019 |
|
Step [700/870] - Training Loss: 7.0269 - Training MSE: 1241.7873 |
|
Step [800/870] - Training Loss: 7.1375 - Training MSE: 1190.3315 |
|
Epoch [16/20] - Training Loss: 9.0716, Training MSE: 1160.8802 - Validation Loss: 3.7649, Validation MSE: 481.1624 |
|
Step [100/870] - Training Loss: 3.1061 - Training MSE: 448.2485 |
|
Step [200/870] - Training Loss: 2.6831 - Training MSE: 443.6960 |
|
Step [300/870] - Training Loss: 3.3528 - Training MSE: 438.0512 |
|
Step [400/870] - Training Loss: 9.4740 - Training MSE: 675.8739 |
|
Step [500/870] - Training Loss: 4.1480 - Training MSE: 691.1889 |
|
Step [600/870] - Training Loss: 4.2169 - Training MSE: 668.9939 |
|
Step [700/870] - Training Loss: 3.0901 - Training MSE: 646.7708 |
|
Step [800/870] - Training Loss: 5.0069 - Training MSE: 628.2751 |
|
Epoch [17/20] - Training Loss: 4.8296, Training MSE: 617.9883 - Validation Loss: 2.7624, Validation MSE: 352.8308 |
|
Step [100/870] - Training Loss: 2.7670 - Training MSE: 320.1880 |
|
Step [200/870] - Training Loss: 1.6207 - Training MSE: 287.7513 |
|
Step [300/870] - Training Loss: 1.9942 - Training MSE: 275.3315 |
|
Step [400/870] - Training Loss: 2.2564 - Training MSE: 270.3133 |
|
Step [500/870] - Training Loss: 1.6254 - Training MSE: 266.9202 |
|
Step [600/870] - Training Loss: 1.7127 - Training MSE: 264.0805 |
|
Step [700/870] - Training Loss: 1.5853 - Training MSE: 261.9605 |
|
Step [800/870] - Training Loss: 1.9506 - Training MSE: 261.0231 |
|
Epoch [18/20] - Training Loss: 2.0329, Training MSE: 260.1193 - Validation Loss: 1.7839, Validation MSE: 227.7603 |
|
Step [100/870] - Training Loss: 1.1409 - Training MSE: 143.1174 |
|
Step [200/870] - Training Loss: 1.1094 - Training MSE: 152.5059 |
|
Step [300/870] - Training Loss: 1.2674 - Training MSE: 157.0374 |
|
Step [400/870] - Training Loss: 1.0359 - Training MSE: 155.5929 |
|
Step [500/870] - Training Loss: 1.1091 - Training MSE: 155.0004 |
|
Step [600/870] - Training Loss: 1.1539 - Training MSE: 155.3943 |
|
Step [700/870] - Training Loss: 1.2641 - Training MSE: 155.1105 |
|
Step [800/870] - Training Loss: 1.2053 - Training MSE: 155.1447 |
|
Epoch [19/20] - Training Loss: 1.2139, Training MSE: 155.3429 - Validation Loss: 1.5769, Validation MSE: 201.3158 |
|
Step [100/870] - Training Loss: 1.0962 - Training MSE: 117.9358 |
|
Step [200/870] - Training Loss: 0.8717 - Training MSE: 117.8234 |
|
Step [300/870] - Training Loss: 0.8542 - Training MSE: 118.7729 |
|
Step [400/870] - Training Loss: 1.0950 - Training MSE: 119.2174 |
|
Step [500/870] - Training Loss: 1.1080 - Training MSE: 119.1448 |
|
Step [600/870] - Training Loss: 1.0546 - Training MSE: 118.9895 |
|
Step [700/870] - Training Loss: 0.8502 - Training MSE: 119.4526 |
|
Step [800/870] - Training Loss: 0.7913 - Training MSE: 119.7514 |
|
Epoch [20/20] - Training Loss: 0.9381, Training MSE: 120.0448 - Validation Loss: 1.5444, Validation MSE: 197.2017 |
|
[1;34mwandb[0m: 🚀 View run [33mHCPflat_raw_beta_age[0m at: [34mhttps://stability.wandb.io/ckadirt/fMRI-foundation-model/runs/HCPflat_raw_beta_age_9a3e14f1-ec90-47c9-a06e-a395872f2271[0m |
|
[1;34mwandb[0m: Find logs at: [1;35mwandb/run-20241126_221003-HCPflat_raw_beta_age_9a3e14f1-ec90-47c9-a06e-a395872f2271/logs[0m |
|
|