|
[2024-11-06 10:44:30,632] INFO: [0;35mWill use torch.nn.parallel.DistributedDataParallel() and 8 gpus[0m |
|
[2024-11-06 10:44:30,636] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-06 10:44:30,636] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-06 10:44:30,637] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-06 10:44:30,637] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-06 10:44:30,637] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-06 10:44:30,637] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-06 10:44:30,637] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-06 10:44:30,637] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-06 10:44:34,659] INFO: configured dtype=torch.bfloat16 for autocast |
|
[2024-11-06 10:44:36,968] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,008] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,048] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,088] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,127] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,167] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,206] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,247] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,287] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,327] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,366] INFO: using attention_type=math |
|
[2024-11-06 10:44:37,406] INFO: using attention_type=math |
|
[2024-11-06 10:44:42,252] INFO: DistributedDataParallel( |
|
(module): MLPF( |
|
(nn0_id): ModuleList( |
|
(0-1): 2 x Sequential( |
|
(0): Linear(in_features=17, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=1024, bias=True) |
|
) |
|
) |
|
(nn0_reg): ModuleList( |
|
(0-1): 2 x Sequential( |
|
(0): Linear(in_features=17, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=1024, bias=True) |
|
) |
|
) |
|
(conv_id): ModuleList( |
|
(0-5): 6 x PreLnSelfAttentionLayer( |
|
(mha): MultiheadAttention( |
|
(out_proj): NonDynamicallyQuantizableLinear(in_features=1024, out_features=1024, bias=True) |
|
) |
|
(norm0): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(seq): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): Linear(in_features=1024, out_features=1024, bias=True) |
|
(3): ReLU() |
|
) |
|
(dropout): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
(conv_reg): ModuleList( |
|
(0-5): 6 x PreLnSelfAttentionLayer( |
|
(mha): MultiheadAttention( |
|
(out_proj): NonDynamicallyQuantizableLinear(in_features=1024, out_features=1024, bias=True) |
|
) |
|
(norm0): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(seq): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): Linear(in_features=1024, out_features=1024, bias=True) |
|
(3): ReLU() |
|
) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(nn_binary_particle): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=2, bias=True) |
|
) |
|
(nn_pid): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=6, bias=True) |
|
) |
|
(nn_pt): RegressionOutput( |
|
(nn): ModuleList( |
|
(0-1): 2 x Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=1, bias=True) |
|
) |
|
) |
|
) |
|
(nn_eta): RegressionOutput( |
|
(nn): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=2, bias=True) |
|
) |
|
) |
|
(nn_sin_phi): RegressionOutput( |
|
(nn): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=2, bias=True) |
|
) |
|
) |
|
(nn_cos_phi): RegressionOutput( |
|
(nn): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=2, bias=True) |
|
) |
|
) |
|
(nn_energy): RegressionOutput( |
|
(nn): ModuleList( |
|
(0-1): 2 x Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=1, bias=True) |
|
) |
|
) |
|
) |
|
(final_norm_id): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(final_norm_reg): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
) |
|
) |
|
[2024-11-06 10:44:42,253] INFO: Trainable parameters: 89388050 |
|
[2024-11-06 10:44:42,253] INFO: Non-trainable parameters: 0 |
|
[2024-11-06 10:44:42,253] INFO: Total parameters: 89388050 |
|
[2024-11-06 10:44:42,257] INFO: Modules Trainable parameters Non-trainable parameters |
|
module.nn0_id.0.0.weight 17408 0 |
|
module.nn0_id.0.0.bias 1024 0 |
|
module.nn0_id.0.2.weight 1024 0 |
|
module.nn0_id.0.2.bias 1024 0 |
|
module.nn0_id.0.4.weight 1048576 0 |
|
module.nn0_id.0.4.bias 1024 0 |
|
module.nn0_id.1.0.weight 17408 0 |
|
module.nn0_id.1.0.bias 1024 0 |
|
module.nn0_id.1.2.weight 1024 0 |
|
module.nn0_id.1.2.bias 1024 0 |
|
module.nn0_id.1.4.weight 1048576 0 |
|
module.nn0_id.1.4.bias 1024 0 |
|
module.nn0_reg.0.0.weight 17408 0 |
|
module.nn0_reg.0.0.bias 1024 0 |
|
module.nn0_reg.0.2.weight 1024 0 |
|
module.nn0_reg.0.2.bias 1024 0 |
|
module.nn0_reg.0.4.weight 1048576 0 |
|
module.nn0_reg.0.4.bias 1024 0 |
|
module.nn0_reg.1.0.weight 17408 0 |
|
module.nn0_reg.1.0.bias 1024 0 |
|
module.nn0_reg.1.2.weight 1024 0 |
|
module.nn0_reg.1.2.bias 1024 0 |
|
module.nn0_reg.1.4.weight 1048576 0 |
|
module.nn0_reg.1.4.bias 1024 0 |
|
module.conv_id.0.mha.in_proj_weight 3145728 0 |
|
module.conv_id.0.mha.in_proj_bias 3072 0 |
|
module.conv_id.0.mha.out_proj.weight 1048576 0 |
|
module.conv_id.0.mha.out_proj.bias 1024 0 |
|
module.conv_id.0.norm0.weight 1024 0 |
|
module.conv_id.0.norm0.bias 1024 0 |
|
module.conv_id.0.norm1.weight 1024 0 |
|
module.conv_id.0.norm1.bias 1024 0 |
|
module.conv_id.0.seq.0.weight 1048576 0 |
|
module.conv_id.0.seq.0.bias 1024 0 |
|
module.conv_id.0.seq.2.weight 1048576 0 |
|
module.conv_id.0.seq.2.bias 1024 0 |
|
module.conv_id.1.mha.in_proj_weight 3145728 0 |
|
module.conv_id.1.mha.in_proj_bias 3072 0 |
|
module.conv_id.1.mha.out_proj.weight 1048576 0 |
|
module.conv_id.1.mha.out_proj.bias 1024 0 |
|
module.conv_id.1.norm0.weight 1024 0 |
|
module.conv_id.1.norm0.bias 1024 0 |
|
module.conv_id.1.norm1.weight 1024 0 |
|
module.conv_id.1.norm1.bias 1024 0 |
|
module.conv_id.1.seq.0.weight 1048576 0 |
|
module.conv_id.1.seq.0.bias 1024 0 |
|
module.conv_id.1.seq.2.weight 1048576 0 |
|
module.conv_id.1.seq.2.bias 1024 0 |
|
module.conv_id.2.mha.in_proj_weight 3145728 0 |
|
module.conv_id.2.mha.in_proj_bias 3072 0 |
|
module.conv_id.2.mha.out_proj.weight 1048576 0 |
|
module.conv_id.2.mha.out_proj.bias 1024 0 |
|
module.conv_id.2.norm0.weight 1024 0 |
|
module.conv_id.2.norm0.bias 1024 0 |
|
module.conv_id.2.norm1.weight 1024 0 |
|
module.conv_id.2.norm1.bias 1024 0 |
|
module.conv_id.2.seq.0.weight 1048576 0 |
|
module.conv_id.2.seq.0.bias 1024 0 |
|
module.conv_id.2.seq.2.weight 1048576 0 |
|
module.conv_id.2.seq.2.bias 1024 0 |
|
module.conv_id.3.mha.in_proj_weight 3145728 0 |
|
module.conv_id.3.mha.in_proj_bias 3072 0 |
|
module.conv_id.3.mha.out_proj.weight 1048576 0 |
|
module.conv_id.3.mha.out_proj.bias 1024 0 |
|
module.conv_id.3.norm0.weight 1024 0 |
|
module.conv_id.3.norm0.bias 1024 0 |
|
module.conv_id.3.norm1.weight 1024 0 |
|
module.conv_id.3.norm1.bias 1024 0 |
|
module.conv_id.3.seq.0.weight 1048576 0 |
|
module.conv_id.3.seq.0.bias 1024 0 |
|
module.conv_id.3.seq.2.weight 1048576 0 |
|
module.conv_id.3.seq.2.bias 1024 0 |
|
module.conv_id.4.mha.in_proj_weight 3145728 0 |
|
module.conv_id.4.mha.in_proj_bias 3072 0 |
|
module.conv_id.4.mha.out_proj.weight 1048576 0 |
|
module.conv_id.4.mha.out_proj.bias 1024 0 |
|
module.conv_id.4.norm0.weight 1024 0 |
|
module.conv_id.4.norm0.bias 1024 0 |
|
module.conv_id.4.norm1.weight 1024 0 |
|
module.conv_id.4.norm1.bias 1024 0 |
|
module.conv_id.4.seq.0.weight 1048576 0 |
|
module.conv_id.4.seq.0.bias 1024 0 |
|
module.conv_id.4.seq.2.weight 1048576 0 |
|
module.conv_id.4.seq.2.bias 1024 0 |
|
module.conv_id.5.mha.in_proj_weight 3145728 0 |
|
module.conv_id.5.mha.in_proj_bias 3072 0 |
|
module.conv_id.5.mha.out_proj.weight 1048576 0 |
|
module.conv_id.5.mha.out_proj.bias 1024 0 |
|
module.conv_id.5.norm0.weight 1024 0 |
|
module.conv_id.5.norm0.bias 1024 0 |
|
module.conv_id.5.norm1.weight 1024 0 |
|
module.conv_id.5.norm1.bias 1024 0 |
|
module.conv_id.5.seq.0.weight 1048576 0 |
|
module.conv_id.5.seq.0.bias 1024 0 |
|
module.conv_id.5.seq.2.weight 1048576 0 |
|
module.conv_id.5.seq.2.bias 1024 0 |
|
module.conv_reg.0.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.0.mha.in_proj_bias 3072 0 |
|
module.conv_reg.0.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.0.mha.out_proj.bias 1024 0 |
|
module.conv_reg.0.norm0.weight 1024 0 |
|
module.conv_reg.0.norm0.bias 1024 0 |
|
module.conv_reg.0.norm1.weight 1024 0 |
|
module.conv_reg.0.norm1.bias 1024 0 |
|
module.conv_reg.0.seq.0.weight 1048576 0 |
|
module.conv_reg.0.seq.0.bias 1024 0 |
|
module.conv_reg.0.seq.2.weight 1048576 0 |
|
module.conv_reg.0.seq.2.bias 1024 0 |
|
module.conv_reg.1.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.1.mha.in_proj_bias 3072 0 |
|
module.conv_reg.1.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.1.mha.out_proj.bias 1024 0 |
|
module.conv_reg.1.norm0.weight 1024 0 |
|
module.conv_reg.1.norm0.bias 1024 0 |
|
module.conv_reg.1.norm1.weight 1024 0 |
|
module.conv_reg.1.norm1.bias 1024 0 |
|
module.conv_reg.1.seq.0.weight 1048576 0 |
|
module.conv_reg.1.seq.0.bias 1024 0 |
|
module.conv_reg.1.seq.2.weight 1048576 0 |
|
module.conv_reg.1.seq.2.bias 1024 0 |
|
module.conv_reg.2.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.2.mha.in_proj_bias 3072 0 |
|
module.conv_reg.2.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.2.mha.out_proj.bias 1024 0 |
|
module.conv_reg.2.norm0.weight 1024 0 |
|
module.conv_reg.2.norm0.bias 1024 0 |
|
module.conv_reg.2.norm1.weight 1024 0 |
|
module.conv_reg.2.norm1.bias 1024 0 |
|
module.conv_reg.2.seq.0.weight 1048576 0 |
|
module.conv_reg.2.seq.0.bias 1024 0 |
|
module.conv_reg.2.seq.2.weight 1048576 0 |
|
module.conv_reg.2.seq.2.bias 1024 0 |
|
module.conv_reg.3.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.3.mha.in_proj_bias 3072 0 |
|
module.conv_reg.3.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.3.mha.out_proj.bias 1024 0 |
|
module.conv_reg.3.norm0.weight 1024 0 |
|
module.conv_reg.3.norm0.bias 1024 0 |
|
module.conv_reg.3.norm1.weight 1024 0 |
|
module.conv_reg.3.norm1.bias 1024 0 |
|
module.conv_reg.3.seq.0.weight 1048576 0 |
|
module.conv_reg.3.seq.0.bias 1024 0 |
|
module.conv_reg.3.seq.2.weight 1048576 0 |
|
module.conv_reg.3.seq.2.bias 1024 0 |
|
module.conv_reg.4.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.4.mha.in_proj_bias 3072 0 |
|
module.conv_reg.4.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.4.mha.out_proj.bias 1024 0 |
|
module.conv_reg.4.norm0.weight 1024 0 |
|
module.conv_reg.4.norm0.bias 1024 0 |
|
module.conv_reg.4.norm1.weight 1024 0 |
|
module.conv_reg.4.norm1.bias 1024 0 |
|
module.conv_reg.4.seq.0.weight 1048576 0 |
|
module.conv_reg.4.seq.0.bias 1024 0 |
|
module.conv_reg.4.seq.2.weight 1048576 0 |
|
module.conv_reg.4.seq.2.bias 1024 0 |
|
module.conv_reg.5.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.5.mha.in_proj_bias 3072 0 |
|
module.conv_reg.5.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.5.mha.out_proj.bias 1024 0 |
|
module.conv_reg.5.norm0.weight 1024 0 |
|
module.conv_reg.5.norm0.bias 1024 0 |
|
module.conv_reg.5.norm1.weight 1024 0 |
|
module.conv_reg.5.norm1.bias 1024 0 |
|
module.conv_reg.5.seq.0.weight 1048576 0 |
|
module.conv_reg.5.seq.0.bias 1024 0 |
|
module.conv_reg.5.seq.2.weight 1048576 0 |
|
module.conv_reg.5.seq.2.bias 1024 0 |
|
module.nn_binary_particle.0.weight 1048576 0 |
|
module.nn_binary_particle.0.bias 1024 0 |
|
module.nn_binary_particle.2.weight 1024 0 |
|
module.nn_binary_particle.2.bias 1024 0 |
|
module.nn_binary_particle.4.weight 2048 0 |
|
module.nn_binary_particle.4.bias 2 0 |
|
module.nn_pid.0.weight 1048576 0 |
|
module.nn_pid.0.bias 1024 0 |
|
module.nn_pid.2.weight 1024 0 |
|
module.nn_pid.2.bias 1024 0 |
|
module.nn_pid.4.weight 6144 0 |
|
module.nn_pid.4.bias 6 0 |
|
module.nn_pt.nn.0.0.weight 1048576 0 |
|
module.nn_pt.nn.0.0.bias 1024 0 |
|
module.nn_pt.nn.0.2.weight 1024 0 |
|
module.nn_pt.nn.0.2.bias 1024 0 |
|
module.nn_pt.nn.0.4.weight 1024 0 |
|
module.nn_pt.nn.0.4.bias 1 0 |
|
module.nn_pt.nn.1.0.weight 1048576 0 |
|
module.nn_pt.nn.1.0.bias 1024 0 |
|
module.nn_pt.nn.1.2.weight 1024 0 |
|
module.nn_pt.nn.1.2.bias 1024 0 |
|
module.nn_pt.nn.1.4.weight 1024 0 |
|
module.nn_pt.nn.1.4.bias 1 0 |
|
module.nn_eta.nn.0.weight 1048576 0 |
|
module.nn_eta.nn.0.bias 1024 0 |
|
module.nn_eta.nn.2.weight 1024 0 |
|
module.nn_eta.nn.2.bias 1024 0 |
|
module.nn_eta.nn.4.weight 2048 0 |
|
module.nn_eta.nn.4.bias 2 0 |
|
module.nn_sin_phi.nn.0.weight 1048576 0 |
|
module.nn_sin_phi.nn.0.bias 1024 0 |
|
module.nn_sin_phi.nn.2.weight 1024 0 |
|
module.nn_sin_phi.nn.2.bias 1024 0 |
|
module.nn_sin_phi.nn.4.weight 2048 0 |
|
module.nn_sin_phi.nn.4.bias 2 0 |
|
module.nn_cos_phi.nn.0.weight 1048576 0 |
|
module.nn_cos_phi.nn.0.bias 1024 0 |
|
module.nn_cos_phi.nn.2.weight 1024 0 |
|
module.nn_cos_phi.nn.2.bias 1024 0 |
|
module.nn_cos_phi.nn.4.weight 2048 0 |
|
module.nn_cos_phi.nn.4.bias 2 0 |
|
module.nn_energy.nn.0.0.weight 1048576 0 |
|
module.nn_energy.nn.0.0.bias 1024 0 |
|
module.nn_energy.nn.0.2.weight 1024 0 |
|
module.nn_energy.nn.0.2.bias 1024 0 |
|
module.nn_energy.nn.0.4.weight 1024 0 |
|
module.nn_energy.nn.0.4.bias 1 0 |
|
module.nn_energy.nn.1.0.weight 1048576 0 |
|
module.nn_energy.nn.1.0.bias 1024 0 |
|
module.nn_energy.nn.1.2.weight 1024 0 |
|
module.nn_energy.nn.1.2.bias 1024 0 |
|
module.nn_energy.nn.1.4.weight 1024 0 |
|
module.nn_energy.nn.1.4.bias 1 0 |
|
module.final_norm_id.weight 1024 0 |
|
module.final_norm_id.bias 1024 0 |
|
module.final_norm_reg.weight 1024 0 |
|
module.final_norm_reg.bias 1024 0 |
|
[2024-11-06 10:44:42,262] INFO: Creating experiment dir experiments/pyg-clic_20241106_104416_929167 |
|
[2024-11-06 10:44:42,262] INFO: [1mModel directory experiments/pyg-clic_20241106_104416_929167[0m |
|
[2024-11-06 10:44:42,466] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719492[0m |
|
[2024-11-06 10:44:42,577] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719490[0m |
|
[2024-11-06 10:44:42,628] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719489[0m |
|
[2024-11-06 10:44:42,674] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719515[0m |
|
[2024-11-06 10:44:42,731] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719510[0m |
|
[2024-11-06 10:44:42,796] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719503[0m |
|
[2024-11-06 10:44:42,856] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719509[0m |
|
[2024-11-06 10:44:42,914] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719484[0m |
|
[2024-11-06 10:44:43,005] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719474[0m |
|
[2024-11-06 10:44:43,067] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 720386[0m |
|
[2024-11-06 10:44:43,118] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,164] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,229] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,287] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,358] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,421] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,506] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,609] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,741] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-06 10:44:43,781] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 714700[0m |
|
[2024-11-06 10:44:43,828] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:43,882] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:43,946] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:44,030] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:44,070] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:44,117] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:44,163] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:44,217] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:44,274] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-06 10:44:44,322] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720700[0m |
|
[2024-11-06 10:45:16,053] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79948[0m |
|
[2024-11-06 10:45:16,065] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m |
|
[2024-11-06 10:45:16,079] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79939[0m |
|
[2024-11-06 10:45:16,093] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79939[0m |
|
[2024-11-06 10:45:16,105] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m |
|
[2024-11-06 10:45:16,118] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m |
|
[2024-11-06 10:45:16,129] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79938[0m |
|
[2024-11-06 10:45:16,137] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79957[0m |
|
[2024-11-06 10:45:16,145] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79955[0m |
|
[2024-11-06 10:45:16,153] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 80035[0m |
|
[2024-11-06 10:45:16,162] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,170] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,178] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,186] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,271] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,281] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,290] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,299] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,307] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-06 10:45:16,315] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79700[0m |
|
[2024-11-06 10:45:16,322] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,330] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,337] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,344] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,351] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,357] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,364] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,371] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,380] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-06 10:45:16,389] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80100[0m |
|
[2024-11-06 10:45:53,742] INFO: [0;31mInitiating epoch #1 train run on device rank=0[0m |
|
[2024-11-06 15:25:43,525] INFO: [0;31mInitiating epoch #1 valid run on device rank=0[0m |
|
[2024-11-06 15:38:40,218] INFO: [1mRank 0: epoch=1 / 50 train_loss=3.2695 valid_loss=2.7055 stale=0 epoch_train_time=279.83m epoch_valid_time=12.83m epoch_total_time=292.77m eta=14346.0m[0m |
|
[2024-11-06 15:38:40,243] INFO: [0;31mInitiating epoch #2 train run on device rank=0[0m |
|
[2024-11-06 20:18:41,106] INFO: [0;31mInitiating epoch #2 valid run on device rank=0[0m |
|
[2024-11-06 20:29:01,620] INFO: [1mRank 0: epoch=2 / 50 train_loss=2.5058 valid_loss=2.4314 stale=0 epoch_train_time=280.01m epoch_valid_time=10.23m epoch_total_time=290.36m eta=13995.2m[0m |
|
[2024-11-06 20:29:01,756] INFO: [0;31mInitiating epoch #3 train run on device rank=0[0m |
|
[2024-11-07 01:07:21,400] INFO: [0;31mInitiating epoch #3 valid run on device rank=0[0m |
|
[2024-11-07 01:18:03,157] INFO: [1mRank 0: epoch=3 / 50 train_loss=2.3264 valid_loss=2.2988 stale=0 epoch_train_time=278.33m epoch_valid_time=10.6m epoch_total_time=289.02m eta=13663.8m[0m |
|
[2024-11-07 01:18:03,266] INFO: [0;31mInitiating epoch #4 train run on device rank=0[0m |
|
[2024-11-07 05:56:28,121] INFO: [0;31mInitiating epoch #4 valid run on device rank=0[0m |
|
[2024-11-07 06:07:09,744] INFO: [1mRank 0: epoch=4 / 50 train_loss=2.2179 valid_loss=2.2087 stale=0 epoch_train_time=278.41m epoch_valid_time=10.57m epoch_total_time=289.11m eta=13354.6m[0m |
|
[2024-11-07 06:07:09,810] INFO: [0;31mInitiating epoch #5 train run on device rank=0[0m |
|
[2024-11-07 11:43:12,868] INFO: [0;35mWill use torch.nn.parallel.DistributedDataParallel() and 8 gpus[0m |
|
[2024-11-07 11:43:12,882] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-07 11:43:12,882] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-07 11:43:12,882] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-07 11:43:12,882] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-07 11:43:12,882] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-07 11:43:12,882] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-07 11:43:12,883] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-07 11:43:12,883] INFO: [0;35mAMD Radeon Graphics[0m |
|
[2024-11-07 11:43:17,269] INFO: configured dtype=torch.bfloat16 for autocast |
|
[2024-11-07 11:43:18,951] INFO: model_kwargs: {'input_dim': 17, 'num_classes': 6, 'input_encoding': 'split', 'pt_mode': 'direct-elemtype-split', 'eta_mode': 'linear', 'sin_phi_mode': 'linear', 'cos_phi_mode': 'linear', 'energy_mode': 'direct-elemtype-split', 'elemtypes_nonzero': [1, 2], 'learned_representation_mode': 'last', 'conv_type': 'attention', 'num_convs': 6, 'dropout_ff': 0.1, 'dropout_conv_id_mha': 0.0, 'dropout_conv_id_ff': 0.0, 'dropout_conv_reg_mha': 0.1, 'dropout_conv_reg_ff': 0.1, 'activation': 'relu', 'head_dim': 32, 'num_heads': 32, 'attention_type': 'math', 'use_pre_layernorm': True} |
|
[2024-11-07 11:43:19,071] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,106] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,142] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,177] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,212] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,248] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,283] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,319] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,354] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,390] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,425] INFO: using attention_type=math |
|
[2024-11-07 11:43:19,461] INFO: using attention_type=math |
|
[2024-11-07 11:43:39,383] INFO: [1mLoaded model weights from experiments/pyg-clic_20241106_104416_929167/checkpoints/checkpoint-04-2.208726.pth[0m |
|
[2024-11-07 11:43:41,253] INFO: DistributedDataParallel( |
|
(module): MLPF( |
|
(nn0_id): ModuleList( |
|
(0-1): 2 x Sequential( |
|
(0): Linear(in_features=17, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=1024, bias=True) |
|
) |
|
) |
|
(nn0_reg): ModuleList( |
|
(0-1): 2 x Sequential( |
|
(0): Linear(in_features=17, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=1024, bias=True) |
|
) |
|
) |
|
(conv_id): ModuleList( |
|
(0-5): 6 x PreLnSelfAttentionLayer( |
|
(mha): MultiheadAttention( |
|
(out_proj): NonDynamicallyQuantizableLinear(in_features=1024, out_features=1024, bias=True) |
|
) |
|
(norm0): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(seq): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): Linear(in_features=1024, out_features=1024, bias=True) |
|
(3): ReLU() |
|
) |
|
(dropout): Dropout(p=0.0, inplace=False) |
|
) |
|
) |
|
(conv_reg): ModuleList( |
|
(0-5): 6 x PreLnSelfAttentionLayer( |
|
(mha): MultiheadAttention( |
|
(out_proj): NonDynamicallyQuantizableLinear(in_features=1024, out_features=1024, bias=True) |
|
) |
|
(norm0): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(seq): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): Linear(in_features=1024, out_features=1024, bias=True) |
|
(3): ReLU() |
|
) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(nn_binary_particle): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=2, bias=True) |
|
) |
|
(nn_pid): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=6, bias=True) |
|
) |
|
(nn_pt): RegressionOutput( |
|
(nn): ModuleList( |
|
(0-1): 2 x Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=1, bias=True) |
|
) |
|
) |
|
) |
|
(nn_eta): RegressionOutput( |
|
(nn): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=2, bias=True) |
|
) |
|
) |
|
(nn_sin_phi): RegressionOutput( |
|
(nn): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=2, bias=True) |
|
) |
|
) |
|
(nn_cos_phi): RegressionOutput( |
|
(nn): Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=2, bias=True) |
|
) |
|
) |
|
(nn_energy): RegressionOutput( |
|
(nn): ModuleList( |
|
(0-1): 2 x Sequential( |
|
(0): Linear(in_features=1024, out_features=1024, bias=True) |
|
(1): ReLU() |
|
(2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(3): Dropout(p=0.1, inplace=False) |
|
(4): Linear(in_features=1024, out_features=1, bias=True) |
|
) |
|
) |
|
) |
|
(final_norm_id): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
(final_norm_reg): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) |
|
) |
|
) |
|
[2024-11-07 11:43:41,254] INFO: Trainable parameters: 89388050 |
|
[2024-11-07 11:43:41,255] INFO: Non-trainable parameters: 0 |
|
[2024-11-07 11:43:41,255] INFO: Total parameters: 89388050 |
|
[2024-11-07 11:43:41,259] INFO: Modules Trainable parameters Non-trainable parameters |
|
module.nn0_id.0.0.weight 17408 0 |
|
module.nn0_id.0.0.bias 1024 0 |
|
module.nn0_id.0.2.weight 1024 0 |
|
module.nn0_id.0.2.bias 1024 0 |
|
module.nn0_id.0.4.weight 1048576 0 |
|
module.nn0_id.0.4.bias 1024 0 |
|
module.nn0_id.1.0.weight 17408 0 |
|
module.nn0_id.1.0.bias 1024 0 |
|
module.nn0_id.1.2.weight 1024 0 |
|
module.nn0_id.1.2.bias 1024 0 |
|
module.nn0_id.1.4.weight 1048576 0 |
|
module.nn0_id.1.4.bias 1024 0 |
|
module.nn0_reg.0.0.weight 17408 0 |
|
module.nn0_reg.0.0.bias 1024 0 |
|
module.nn0_reg.0.2.weight 1024 0 |
|
module.nn0_reg.0.2.bias 1024 0 |
|
module.nn0_reg.0.4.weight 1048576 0 |
|
module.nn0_reg.0.4.bias 1024 0 |
|
module.nn0_reg.1.0.weight 17408 0 |
|
module.nn0_reg.1.0.bias 1024 0 |
|
module.nn0_reg.1.2.weight 1024 0 |
|
module.nn0_reg.1.2.bias 1024 0 |
|
module.nn0_reg.1.4.weight 1048576 0 |
|
module.nn0_reg.1.4.bias 1024 0 |
|
module.conv_id.0.mha.in_proj_weight 3145728 0 |
|
module.conv_id.0.mha.in_proj_bias 3072 0 |
|
module.conv_id.0.mha.out_proj.weight 1048576 0 |
|
module.conv_id.0.mha.out_proj.bias 1024 0 |
|
module.conv_id.0.norm0.weight 1024 0 |
|
module.conv_id.0.norm0.bias 1024 0 |
|
module.conv_id.0.norm1.weight 1024 0 |
|
module.conv_id.0.norm1.bias 1024 0 |
|
module.conv_id.0.seq.0.weight 1048576 0 |
|
module.conv_id.0.seq.0.bias 1024 0 |
|
module.conv_id.0.seq.2.weight 1048576 0 |
|
module.conv_id.0.seq.2.bias 1024 0 |
|
module.conv_id.1.mha.in_proj_weight 3145728 0 |
|
module.conv_id.1.mha.in_proj_bias 3072 0 |
|
module.conv_id.1.mha.out_proj.weight 1048576 0 |
|
module.conv_id.1.mha.out_proj.bias 1024 0 |
|
module.conv_id.1.norm0.weight 1024 0 |
|
module.conv_id.1.norm0.bias 1024 0 |
|
module.conv_id.1.norm1.weight 1024 0 |
|
module.conv_id.1.norm1.bias 1024 0 |
|
module.conv_id.1.seq.0.weight 1048576 0 |
|
module.conv_id.1.seq.0.bias 1024 0 |
|
module.conv_id.1.seq.2.weight 1048576 0 |
|
module.conv_id.1.seq.2.bias 1024 0 |
|
module.conv_id.2.mha.in_proj_weight 3145728 0 |
|
module.conv_id.2.mha.in_proj_bias 3072 0 |
|
module.conv_id.2.mha.out_proj.weight 1048576 0 |
|
module.conv_id.2.mha.out_proj.bias 1024 0 |
|
module.conv_id.2.norm0.weight 1024 0 |
|
module.conv_id.2.norm0.bias 1024 0 |
|
module.conv_id.2.norm1.weight 1024 0 |
|
module.conv_id.2.norm1.bias 1024 0 |
|
module.conv_id.2.seq.0.weight 1048576 0 |
|
module.conv_id.2.seq.0.bias 1024 0 |
|
module.conv_id.2.seq.2.weight 1048576 0 |
|
module.conv_id.2.seq.2.bias 1024 0 |
|
module.conv_id.3.mha.in_proj_weight 3145728 0 |
|
module.conv_id.3.mha.in_proj_bias 3072 0 |
|
module.conv_id.3.mha.out_proj.weight 1048576 0 |
|
module.conv_id.3.mha.out_proj.bias 1024 0 |
|
module.conv_id.3.norm0.weight 1024 0 |
|
module.conv_id.3.norm0.bias 1024 0 |
|
module.conv_id.3.norm1.weight 1024 0 |
|
module.conv_id.3.norm1.bias 1024 0 |
|
module.conv_id.3.seq.0.weight 1048576 0 |
|
module.conv_id.3.seq.0.bias 1024 0 |
|
module.conv_id.3.seq.2.weight 1048576 0 |
|
module.conv_id.3.seq.2.bias 1024 0 |
|
module.conv_id.4.mha.in_proj_weight 3145728 0 |
|
module.conv_id.4.mha.in_proj_bias 3072 0 |
|
module.conv_id.4.mha.out_proj.weight 1048576 0 |
|
module.conv_id.4.mha.out_proj.bias 1024 0 |
|
module.conv_id.4.norm0.weight 1024 0 |
|
module.conv_id.4.norm0.bias 1024 0 |
|
module.conv_id.4.norm1.weight 1024 0 |
|
module.conv_id.4.norm1.bias 1024 0 |
|
module.conv_id.4.seq.0.weight 1048576 0 |
|
module.conv_id.4.seq.0.bias 1024 0 |
|
module.conv_id.4.seq.2.weight 1048576 0 |
|
module.conv_id.4.seq.2.bias 1024 0 |
|
module.conv_id.5.mha.in_proj_weight 3145728 0 |
|
module.conv_id.5.mha.in_proj_bias 3072 0 |
|
module.conv_id.5.mha.out_proj.weight 1048576 0 |
|
module.conv_id.5.mha.out_proj.bias 1024 0 |
|
module.conv_id.5.norm0.weight 1024 0 |
|
module.conv_id.5.norm0.bias 1024 0 |
|
module.conv_id.5.norm1.weight 1024 0 |
|
module.conv_id.5.norm1.bias 1024 0 |
|
module.conv_id.5.seq.0.weight 1048576 0 |
|
module.conv_id.5.seq.0.bias 1024 0 |
|
module.conv_id.5.seq.2.weight 1048576 0 |
|
module.conv_id.5.seq.2.bias 1024 0 |
|
module.conv_reg.0.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.0.mha.in_proj_bias 3072 0 |
|
module.conv_reg.0.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.0.mha.out_proj.bias 1024 0 |
|
module.conv_reg.0.norm0.weight 1024 0 |
|
module.conv_reg.0.norm0.bias 1024 0 |
|
module.conv_reg.0.norm1.weight 1024 0 |
|
module.conv_reg.0.norm1.bias 1024 0 |
|
module.conv_reg.0.seq.0.weight 1048576 0 |
|
module.conv_reg.0.seq.0.bias 1024 0 |
|
module.conv_reg.0.seq.2.weight 1048576 0 |
|
module.conv_reg.0.seq.2.bias 1024 0 |
|
module.conv_reg.1.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.1.mha.in_proj_bias 3072 0 |
|
module.conv_reg.1.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.1.mha.out_proj.bias 1024 0 |
|
module.conv_reg.1.norm0.weight 1024 0 |
|
module.conv_reg.1.norm0.bias 1024 0 |
|
module.conv_reg.1.norm1.weight 1024 0 |
|
module.conv_reg.1.norm1.bias 1024 0 |
|
module.conv_reg.1.seq.0.weight 1048576 0 |
|
module.conv_reg.1.seq.0.bias 1024 0 |
|
module.conv_reg.1.seq.2.weight 1048576 0 |
|
module.conv_reg.1.seq.2.bias 1024 0 |
|
module.conv_reg.2.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.2.mha.in_proj_bias 3072 0 |
|
module.conv_reg.2.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.2.mha.out_proj.bias 1024 0 |
|
module.conv_reg.2.norm0.weight 1024 0 |
|
module.conv_reg.2.norm0.bias 1024 0 |
|
module.conv_reg.2.norm1.weight 1024 0 |
|
module.conv_reg.2.norm1.bias 1024 0 |
|
module.conv_reg.2.seq.0.weight 1048576 0 |
|
module.conv_reg.2.seq.0.bias 1024 0 |
|
module.conv_reg.2.seq.2.weight 1048576 0 |
|
module.conv_reg.2.seq.2.bias 1024 0 |
|
module.conv_reg.3.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.3.mha.in_proj_bias 3072 0 |
|
module.conv_reg.3.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.3.mha.out_proj.bias 1024 0 |
|
module.conv_reg.3.norm0.weight 1024 0 |
|
module.conv_reg.3.norm0.bias 1024 0 |
|
module.conv_reg.3.norm1.weight 1024 0 |
|
module.conv_reg.3.norm1.bias 1024 0 |
|
module.conv_reg.3.seq.0.weight 1048576 0 |
|
module.conv_reg.3.seq.0.bias 1024 0 |
|
module.conv_reg.3.seq.2.weight 1048576 0 |
|
module.conv_reg.3.seq.2.bias 1024 0 |
|
module.conv_reg.4.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.4.mha.in_proj_bias 3072 0 |
|
module.conv_reg.4.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.4.mha.out_proj.bias 1024 0 |
|
module.conv_reg.4.norm0.weight 1024 0 |
|
module.conv_reg.4.norm0.bias 1024 0 |
|
module.conv_reg.4.norm1.weight 1024 0 |
|
module.conv_reg.4.norm1.bias 1024 0 |
|
module.conv_reg.4.seq.0.weight 1048576 0 |
|
module.conv_reg.4.seq.0.bias 1024 0 |
|
module.conv_reg.4.seq.2.weight 1048576 0 |
|
module.conv_reg.4.seq.2.bias 1024 0 |
|
module.conv_reg.5.mha.in_proj_weight 3145728 0 |
|
module.conv_reg.5.mha.in_proj_bias 3072 0 |
|
module.conv_reg.5.mha.out_proj.weight 1048576 0 |
|
module.conv_reg.5.mha.out_proj.bias 1024 0 |
|
module.conv_reg.5.norm0.weight 1024 0 |
|
module.conv_reg.5.norm0.bias 1024 0 |
|
module.conv_reg.5.norm1.weight 1024 0 |
|
module.conv_reg.5.norm1.bias 1024 0 |
|
module.conv_reg.5.seq.0.weight 1048576 0 |
|
module.conv_reg.5.seq.0.bias 1024 0 |
|
module.conv_reg.5.seq.2.weight 1048576 0 |
|
module.conv_reg.5.seq.2.bias 1024 0 |
|
module.nn_binary_particle.0.weight 1048576 0 |
|
module.nn_binary_particle.0.bias 1024 0 |
|
module.nn_binary_particle.2.weight 1024 0 |
|
module.nn_binary_particle.2.bias 1024 0 |
|
module.nn_binary_particle.4.weight 2048 0 |
|
module.nn_binary_particle.4.bias 2 0 |
|
module.nn_pid.0.weight 1048576 0 |
|
module.nn_pid.0.bias 1024 0 |
|
module.nn_pid.2.weight 1024 0 |
|
module.nn_pid.2.bias 1024 0 |
|
module.nn_pid.4.weight 6144 0 |
|
module.nn_pid.4.bias 6 0 |
|
module.nn_pt.nn.0.0.weight 1048576 0 |
|
module.nn_pt.nn.0.0.bias 1024 0 |
|
module.nn_pt.nn.0.2.weight 1024 0 |
|
module.nn_pt.nn.0.2.bias 1024 0 |
|
module.nn_pt.nn.0.4.weight 1024 0 |
|
module.nn_pt.nn.0.4.bias 1 0 |
|
module.nn_pt.nn.1.0.weight 1048576 0 |
|
module.nn_pt.nn.1.0.bias 1024 0 |
|
module.nn_pt.nn.1.2.weight 1024 0 |
|
module.nn_pt.nn.1.2.bias 1024 0 |
|
module.nn_pt.nn.1.4.weight 1024 0 |
|
module.nn_pt.nn.1.4.bias 1 0 |
|
module.nn_eta.nn.0.weight 1048576 0 |
|
module.nn_eta.nn.0.bias 1024 0 |
|
module.nn_eta.nn.2.weight 1024 0 |
|
module.nn_eta.nn.2.bias 1024 0 |
|
module.nn_eta.nn.4.weight 2048 0 |
|
module.nn_eta.nn.4.bias 2 0 |
|
module.nn_sin_phi.nn.0.weight 1048576 0 |
|
module.nn_sin_phi.nn.0.bias 1024 0 |
|
module.nn_sin_phi.nn.2.weight 1024 0 |
|
module.nn_sin_phi.nn.2.bias 1024 0 |
|
module.nn_sin_phi.nn.4.weight 2048 0 |
|
module.nn_sin_phi.nn.4.bias 2 0 |
|
module.nn_cos_phi.nn.0.weight 1048576 0 |
|
module.nn_cos_phi.nn.0.bias 1024 0 |
|
module.nn_cos_phi.nn.2.weight 1024 0 |
|
module.nn_cos_phi.nn.2.bias 1024 0 |
|
module.nn_cos_phi.nn.4.weight 2048 0 |
|
module.nn_cos_phi.nn.4.bias 2 0 |
|
module.nn_energy.nn.0.0.weight 1048576 0 |
|
module.nn_energy.nn.0.0.bias 1024 0 |
|
module.nn_energy.nn.0.2.weight 1024 0 |
|
module.nn_energy.nn.0.2.bias 1024 0 |
|
module.nn_energy.nn.0.4.weight 1024 0 |
|
module.nn_energy.nn.0.4.bias 1 0 |
|
module.nn_energy.nn.1.0.weight 1048576 0 |
|
module.nn_energy.nn.1.0.bias 1024 0 |
|
module.nn_energy.nn.1.2.weight 1024 0 |
|
module.nn_energy.nn.1.2.bias 1024 0 |
|
module.nn_energy.nn.1.4.weight 1024 0 |
|
module.nn_energy.nn.1.4.bias 1 0 |
|
module.final_norm_id.weight 1024 0 |
|
module.final_norm_id.bias 1024 0 |
|
module.final_norm_reg.weight 1024 0 |
|
module.final_norm_reg.bias 1024 0 |
|
[2024-11-07 11:43:41,267] INFO: Creating experiment dir experiments/pyg-clic_20241106_104416_929167 |
|
[2024-11-07 11:43:41,267] INFO: [1mModel directory experiments/pyg-clic_20241106_104416_929167[0m |
|
[2024-11-07 11:43:41,494] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719492[0m |
|
[2024-11-07 11:43:41,603] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719490[0m |
|
[2024-11-07 11:43:41,656] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719489[0m |
|
[2024-11-07 11:43:41,714] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719515[0m |
|
[2024-11-07 11:43:41,756] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719510[0m |
|
[2024-11-07 11:43:41,814] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719503[0m |
|
[2024-11-07 11:43:41,863] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719509[0m |
|
[2024-11-07 11:43:42,131] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719484[0m |
|
[2024-11-07 11:43:42,217] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 719474[0m |
|
[2024-11-07 11:43:42,310] INFO: [0;34mtrain_dataset: clic_edm_qq_pf, 720386[0m |
|
[2024-11-07 11:43:42,596] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:42,707] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:42,778] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:42,825] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:42,918] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:43,016] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:43,150] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:43,215] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:43,310] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 713900[0m |
|
[2024-11-07 11:43:43,491] INFO: [0;34mtrain_dataset: clic_edm_ttbar_pf, 714700[0m |
|
[2024-11-07 11:43:43,543] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:43,667] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:43,849] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:43,890] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:44,720] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:44,912] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:45,203] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:45,262] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:45,329] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720000[0m |
|
[2024-11-07 11:43:45,463] INFO: [0;34mtrain_dataset: clic_edm_ww_fullhad_pf, 720700[0m |
|
[2024-11-07 11:44:20,292] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79948[0m |
|
[2024-11-07 11:44:20,300] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m |
|
[2024-11-07 11:44:20,306] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79939[0m |
|
[2024-11-07 11:44:20,313] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79939[0m |
|
[2024-11-07 11:44:20,319] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m |
|
[2024-11-07 11:44:20,326] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79950[0m |
|
[2024-11-07 11:44:20,332] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79938[0m |
|
[2024-11-07 11:44:20,339] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79957[0m |
|
[2024-11-07 11:44:20,345] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 79955[0m |
|
[2024-11-07 11:44:20,352] INFO: [0;34mvalid_dataset: clic_edm_qq_pf, 80035[0m |
|
[2024-11-07 11:44:20,360] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,368] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,377] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,386] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,394] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,402] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,411] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,419] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,426] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79300[0m |
|
[2024-11-07 11:44:20,473] INFO: [0;34mvalid_dataset: clic_edm_ttbar_pf, 79700[0m |
|
[2024-11-07 11:44:20,481] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,487] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,495] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,502] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,509] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,516] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,523] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,531] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,538] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80000[0m |
|
[2024-11-07 11:44:20,546] INFO: [0;34mvalid_dataset: clic_edm_ww_fullhad_pf, 80100[0m |
|
[2024-11-07 11:44:58,920] INFO: [0;31mInitiating epoch #5 train run on device rank=0[0m |
|
[2024-11-07 18:29:31,143] INFO: [0;31mInitiating epoch #5 valid run on device rank=0[0m |
|
[2024-11-07 18:39:59,162] INFO: [1mRank 0: epoch=5 / 50 train_loss=2.1360 valid_loss=2.1386 stale=0 epoch_train_time=404.54m epoch_valid_time=10.36m epoch_total_time=415.0m eta=3735.0m[0m |
|
[2024-11-07 18:39:59,337] INFO: [0;31mInitiating epoch #6 train run on device rank=0[0m |
|
[2024-11-07 23:25:58,002] INFO: [0;31mInitiating epoch #6 valid run on device rank=0[0m |
|
[2024-11-07 23:36:28,484] INFO: [1mRank 0: epoch=6 / 50 train_loss=2.0752 valid_loss=2.0869 stale=0 epoch_train_time=285.98m epoch_valid_time=10.4m epoch_total_time=296.49m eta=5217.6m[0m |
|
[2024-11-07 23:36:28,512] INFO: [0;31mInitiating epoch #7 train run on device rank=0[0m |
|
[2024-11-08 04:22:24,580] INFO: [0;31mInitiating epoch #7 valid run on device rank=0[0m |
|
[2024-11-08 04:32:53,928] INFO: [1mRank 0: epoch=7 / 50 train_loss=2.0329 valid_loss=2.0542 stale=0 epoch_train_time=285.93m epoch_valid_time=10.38m epoch_total_time=296.42m eta=6191.5m[0m |
|
[2024-11-08 04:32:54,096] INFO: [0;31mInitiating epoch #8 train run on device rank=0[0m |
|
[2024-11-08 09:19:05,797] INFO: [0;31mInitiating epoch #8 valid run on device rank=0[0m |
|
[2024-11-08 09:29:42,252] INFO: [1mRank 0: epoch=8 / 50 train_loss=2.0020 valid_loss=2.0320 stale=0 epoch_train_time=286.2m epoch_valid_time=10.5m epoch_total_time=296.8m eta=6849.8m[0m |
|
[2024-11-08 09:29:42,318] INFO: [0;31mInitiating epoch #9 train run on device rank=0[0m |
|
[2024-11-08 14:16:25,441] INFO: [0;31mInitiating epoch #9 valid run on device rank=0[0m |
|
[2024-11-08 14:26:56,005] INFO: [1mRank 0: epoch=9 / 50 train_loss=1.9780 valid_loss=2.0092 stale=0 epoch_train_time=286.72m epoch_valid_time=10.4m epoch_total_time=297.23m eta=7297.8m[0m |
|
[2024-11-08 14:26:56,037] INFO: [0;31mInitiating epoch #10 train run on device rank=0[0m |
|
[2024-11-08 19:14:35,797] INFO: [0;31mInitiating epoch #10 valid run on device rank=0[0m |
|
[2024-11-08 19:25:05,818] INFO: [1mRank 0: epoch=10 / 50 train_loss=1.9588 valid_loss=1.9940 stale=0 epoch_train_time=287.66m epoch_valid_time=10.4m epoch_total_time=298.16m eta=7600.5m[0m |
|
[2024-11-08 19:25:05,836] INFO: [0;31mInitiating epoch #11 train run on device rank=0[0m |
|
[2024-11-09 00:11:18,362] INFO: [0;31mInitiating epoch #11 valid run on device rank=0[0m |
|
[2024-11-09 00:21:46,801] INFO: [1mRank 0: epoch=11 / 50 train_loss=1.9427 valid_loss=1.9812 stale=0 epoch_train_time=286.21m epoch_valid_time=10.39m epoch_total_time=296.68m eta=7788.6m[0m |
|
[2024-11-09 00:21:46,822] INFO: [0;31mInitiating epoch #12 train run on device rank=0[0m |
|
[2024-11-09 05:07:38,020] INFO: [0;31mInitiating epoch #12 valid run on device rank=0[0m |
|
[2024-11-09 05:18:04,820] INFO: [1mRank 0: epoch=12 / 50 train_loss=1.9289 valid_loss=1.9702 stale=0 epoch_train_time=285.85m epoch_valid_time=10.35m epoch_total_time=296.3m eta=7894.8m[0m |
|
[2024-11-09 05:18:04,841] INFO: [0;31mInitiating epoch #13 train run on device rank=0[0m |
|
[2024-11-09 10:04:46,920] INFO: [0;31mInitiating epoch #13 valid run on device rank=0[0m |
|
[2024-11-09 10:15:15,577] INFO: [1mRank 0: epoch=13 / 50 train_loss=1.9158 valid_loss=1.9599 stale=0 epoch_train_time=286.7m epoch_valid_time=10.37m epoch_total_time=297.18m eta=7941.6m[0m |
|
[2024-11-09 10:15:15,595] INFO: [0;31mInitiating epoch #14 train run on device rank=0[0m |
|
[2024-11-09 15:01:56,180] INFO: [0;31mInitiating epoch #14 valid run on device rank=0[0m |
|
[2024-11-09 15:12:18,008] INFO: [1mRank 0: epoch=14 / 50 train_loss=1.9037 valid_loss=1.9494 stale=0 epoch_train_time=286.68m epoch_valid_time=10.25m epoch_total_time=297.04m eta=7938.8m[0m |
|
[2024-11-09 15:12:18,032] INFO: [0;31mInitiating epoch #15 train run on device rank=0[0m |
|
[2024-11-09 19:58:57,038] INFO: [0;31mInitiating epoch #15 valid run on device rank=0[0m |
|
[2024-11-09 20:09:24,231] INFO: [1mRank 0: epoch=15 / 50 train_loss=1.8933 valid_loss=1.9422 stale=0 epoch_train_time=286.65m epoch_valid_time=10.34m epoch_total_time=297.1m eta=7897.0m[0m |
|
[2024-11-09 20:09:24,247] INFO: [0;31mInitiating epoch #16 train run on device rank=0[0m |
|
[2024-11-10 00:56:02,395] INFO: [0;31mInitiating epoch #16 valid run on device rank=0[0m |
|
[2024-11-10 01:06:24,170] INFO: [1mRank 0: epoch=16 / 50 train_loss=1.8838 valid_loss=1.9355 stale=0 epoch_train_time=286.64m epoch_valid_time=10.25m epoch_total_time=297.0m eta=7823.0m[0m |
|
[2024-11-10 01:06:24,214] INFO: [0;31mInitiating epoch #17 train run on device rank=0[0m |
|
|