24-0830-wanda-llama3.1-8B / sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.4.csv
Vui Seng Chua
Add content
37aba61
row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1005859375,0.39990234375,0.736328125,0.39990234375,0.0,0.1484375,1.0,0.39990234375,0.0625,0.375,0.9375
1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.10595703125,0.3974609375,0.7314453125,0.39990234375,0.0,0.1640625,1.0,0.39990234375,0.0625,0.375,0.9375
2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.12109375,0.4033203125,0.697265625,0.39990234375,0.0,0.1484375,1.0,0.39990234375,0.0625,0.375,0.875
3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.0439453125,0.322265625,0.998046875,0.39990234375,0.0,0.3046875,1.0,0.39990234375,0.0,0.3125,1.0
4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3310546875,0.39892578125,0.49755859375,0.3999023735523224,0.0390625,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34375,0.3984375,0.5087890625,0.3999023735523224,0.046875,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.26806640625,0.400390625,0.51025390625,0.3999721109867096,0.015625,0.40625,0.8671875,0.3999721109867096,0.0,0.375,1.0
7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.23974609375,0.400390625,0.59716796875,0.39990234375,0.0,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.23681640625,0.400390625,0.57861328125,0.39990234375,0.0,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.24169921875,0.39990234375,0.54736328125,0.39990234375,0.0078125,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1552734375,0.38818359375,0.91796875,0.39990234375,0.015625,0.3515625,1.0,0.39990234375,0.0,0.375,1.0
11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.3984375,0.49951171875,0.3999023735523224,0.0859375,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34228515625,0.3984375,0.4873046875,0.3999023735523224,0.0703125,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3134765625,0.40087890625,0.498046875,0.3999721109867096,0.0,0.4140625,0.7421875,0.3999721109867096,0.0,0.375,1.0
14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.27880859375,0.40087890625,0.52685546875,0.39990234375,0.0,0.4296875,1.0,0.39990234375,0.0,0.375,0.9375
15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.28076171875,0.40087890625,0.548828125,0.39990234375,0.0,0.4296875,1.0,0.39990234375,0.0,0.375,0.9375
16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31298828125,0.400390625,0.50732421875,0.39990234375,0.03125,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.169921875,0.34814453125,0.912109375,0.39990234375,0.0,0.3515625,1.0,0.39990234375,0.0,0.375,1.0
18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34521484375,0.39892578125,0.49267578125,0.3999023735523224,0.0546875,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3369140625,0.3984375,0.48876953125,0.3999023735523224,0.109375,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29931640625,0.400390625,0.5205078125,0.3999721109867096,0.0078125,0.4140625,0.78125,0.3999721109867096,0.0,0.375,1.0
21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.310546875,0.400390625,0.49658203125,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,1.0
22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.29541015625,0.40087890625,0.49072265625,0.39990234375,0.0078125,0.421875,1.0,0.39990234375,0.0,0.375,0.875
23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32470703125,0.400390625,0.49169921875,0.39990234375,0.046875,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1865234375,0.38427734375,0.68994140625,0.39990234375,0.0234375,0.390625,0.8984375,0.39990234375,0.0,0.375,1.0
25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3486328125,0.39892578125,0.48876953125,0.3999023735523224,0.0390625,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3349609375,0.39892578125,0.49169921875,0.3999023735523224,0.1015625,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.30419921875,0.3994140625,0.4921875,0.3999721109867096,0.0078125,0.40625,0.75,0.3999721109867096,0.0,0.375,0.9375
28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.2998046875,0.40087890625,0.49462890625,0.39990234375,0.0,0.421875,1.0,0.39990234375,0.0,0.375,1.0
29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.298828125,0.400390625,0.49560546875,0.39990234375,0.0,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32275390625,0.39990234375,0.509765625,0.39990234375,0.0234375,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1865234375,0.36181640625,0.8828125,0.39990234375,0.0078125,0.359375,0.9921875,0.39990234375,0.0,0.375,1.0
32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.337890625,0.39990234375,0.4677734375,0.3999023735523224,0.0234375,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3466796875,0.39990234375,0.46923828125,0.3999023735523224,0.09375,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31787109375,0.400390625,0.48388671875,0.3999721109867096,0.015625,0.3984375,0.9375,0.3999721109867096,0.0,0.375,1.0
35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.29833984375,0.400390625,0.49609375,0.39990234375,0.0078125,0.421875,0.875,0.39990234375,0.0,0.375,0.9375
36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.28662109375,0.40087890625,0.48779296875,0.39990234375,0.0,0.421875,0.8046875,0.39990234375,0.0,0.375,1.0
37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32470703125,0.39990234375,0.5029296875,0.39990234375,0.0234375,0.4140625,1.0,0.39990234375,0.0,0.375,1.0
38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1396484375,0.3388671875,0.85498046875,0.39990234375,0.0078125,0.359375,0.9765625,0.39990234375,0.0,0.375,1.0
39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33251953125,0.400390625,0.4619140625,0.3999023735523224,0.0078125,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.333984375,0.39990234375,0.482421875,0.3999023735523224,0.0703125,0.3984375,1.0,0.3999023735523224,0.0,0.375,0.9375
41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.32275390625,0.39990234375,0.48974609375,0.3999721109867096,0.0,0.3984375,0.8046875,0.3999721109867096,0.0,0.375,1.0
42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.314453125,0.400390625,0.482421875,0.39990234375,0.0,0.4140625,0.8046875,0.39990234375,0.0,0.375,0.9375
43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.2998046875,0.4013671875,0.47998046875,0.39990234375,0.0078125,0.421875,0.8515625,0.39990234375,0.0,0.375,0.9375
44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31982421875,0.400390625,0.478515625,0.39990234375,0.0234375,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1962890625,0.3837890625,0.7177734375,0.39990234375,0.0078125,0.3828125,0.8984375,0.39990234375,0.0,0.375,1.0
46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3359375,0.400390625,0.4658203125,0.3999023735523224,0.015625,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34521484375,0.39990234375,0.46533203125,0.3999023735523224,0.0703125,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.32861328125,0.39990234375,0.49365234375,0.3999721109867096,0.0,0.3984375,0.8515625,0.3999721109867096,0.0,0.375,1.0
49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.30908203125,0.400390625,0.474609375,0.39990234375,0.0,0.4140625,0.7734375,0.39990234375,0.0,0.375,0.9375
50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.40087890625,0.4736328125,0.39990234375,0.0,0.4140625,0.8125,0.39990234375,0.0,0.375,0.9375
51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32763671875,0.3994140625,0.4677734375,0.39990234375,0.0234375,0.40625,0.9921875,0.39990234375,0.0,0.375,0.9375
52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.271484375,0.39453125,0.5712890625,0.39990234375,0.0078125,0.40625,0.875,0.39990234375,0.0,0.375,1.0
53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33447265625,0.400390625,0.44921875,0.3999023735523224,0.015625,0.40625,0.6328125,0.3999023735523224,0.0,0.375,1.0
54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33154296875,0.400390625,0.44921875,0.3999023735523224,0.0703125,0.40625,0.609375,0.3999023735523224,0.0,0.375,1.0
55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3212890625,0.400390625,0.47998046875,0.3999721109867096,0.0,0.3984375,0.859375,0.3999721109867096,0.0,0.375,1.0
56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3193359375,0.400390625,0.48388671875,0.39990234375,0.0,0.40625,0.734375,0.39990234375,0.0,0.375,1.0
57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.306640625,0.400390625,0.46875,0.39990234375,0.0,0.4140625,0.7734375,0.39990234375,0.0,0.375,0.9375
58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33984375,0.39990234375,0.47021484375,0.39990234375,0.0234375,0.40625,0.9921875,0.39990234375,0.0,0.375,0.9375
59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.2197265625,0.3935546875,0.6259765625,0.39990234375,0.0078125,0.3984375,0.7890625,0.39990234375,0.0,0.375,1.0
60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33544921875,0.400390625,0.4736328125,0.3999023735523224,0.0078125,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3330078125,0.400390625,0.4755859375,0.3999023735523224,0.03125,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3232421875,0.400390625,0.47412109375,0.3999721109867096,0.046875,0.3984375,0.828125,0.3999721109867096,0.0,0.375,0.9375
63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31689453125,0.400390625,0.47021484375,0.39990234375,0.0,0.40625,0.75,0.39990234375,0.0,0.375,0.9375
64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31591796875,0.40087890625,0.46923828125,0.39990234375,0.0,0.40625,0.7890625,0.39990234375,0.0,0.375,0.9375
65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33251953125,0.39990234375,0.46630859375,0.39990234375,0.03125,0.3984375,0.9921875,0.39990234375,0.0,0.375,0.9375
66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.2197265625,0.39111328125,0.65283203125,0.39990234375,0.015625,0.3984375,0.8203125,0.39990234375,0.0,0.375,1.0
67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.333984375,0.40087890625,0.44775390625,0.3999023735523224,0.0078125,0.40625,0.6171875,0.3999023735523224,0.0,0.375,1.0
68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.400390625,0.44921875,0.3999023735523224,0.046875,0.3984375,0.640625,0.3999023735523224,0.0,0.375,0.9375
69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3095703125,0.400390625,0.4853515625,0.3999721109867096,0.0,0.3984375,0.828125,0.3999721109867096,0.0,0.375,1.0
70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31787109375,0.400390625,0.47119140625,0.39990234375,0.0,0.40625,0.7421875,0.39990234375,0.0,0.375,0.9375
71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31396484375,0.40087890625,0.47265625,0.39990234375,0.0,0.4140625,0.828125,0.39990234375,0.0,0.375,0.9375
72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31396484375,0.39990234375,0.4619140625,0.39990234375,0.0390625,0.40625,0.9921875,0.39990234375,0.0,0.375,0.9375
73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.19482421875,0.36962890625,0.7080078125,0.39990234375,0.0078125,0.3828125,0.8671875,0.39990234375,0.0,0.375,1.0
74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3173828125,0.40087890625,0.45458984375,0.3999023735523224,0.015625,0.40625,0.640625,0.3999023735523224,0.0,0.375,1.0
75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34375,0.400390625,0.447265625,0.3999023735523224,0.046875,0.3984375,0.6484375,0.3999023735523224,0.0,0.375,1.0
76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.306640625,0.39990234375,0.48095703125,0.3999721109867096,0.0078125,0.40625,0.8046875,0.3999721109867096,0.0,0.375,1.0
77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31103515625,0.400390625,0.4638671875,0.39990234375,0.0,0.40625,0.7890625,0.39990234375,0.0,0.375,0.9375
78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3212890625,0.40087890625,0.4658203125,0.39990234375,0.0,0.40625,0.8671875,0.39990234375,0.0,0.375,0.9375
79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33154296875,0.39990234375,0.455078125,0.39990234375,0.0546875,0.3984375,0.9921875,0.39990234375,0.0,0.375,0.9375
80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.240234375,0.3818359375,0.70361328125,0.39990234375,0.0078125,0.3984375,0.84375,0.39990234375,0.0,0.375,1.0
81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.326171875,0.40087890625,0.45166015625,0.3999023735523224,0.015625,0.40625,0.6484375,0.3999023735523224,0.0,0.375,1.0
82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3359375,0.400390625,0.45361328125,0.3999023735523224,0.046875,0.3984375,0.6796875,0.3999023735523224,0.0,0.375,1.0
83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31689453125,0.3994140625,0.482421875,0.3999721109867096,0.0,0.40625,0.875,0.3999721109867096,0.0,0.375,1.0
84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31884765625,0.40087890625,0.46240234375,0.39990234375,0.0,0.40625,0.765625,0.39990234375,0.0,0.375,1.0
85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31103515625,0.40087890625,0.4580078125,0.39990234375,0.0078125,0.40625,0.8359375,0.39990234375,0.0,0.375,0.9375
86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32861328125,0.3994140625,0.46240234375,0.39990234375,0.046875,0.3984375,0.9921875,0.39990234375,0.0,0.375,0.9375
87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.18359375,0.40087890625,0.60302734375,0.39990234375,0.015625,0.40625,0.7734375,0.39990234375,0.0,0.375,1.0
88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.337890625,0.40087890625,0.44873046875,0.3999023735523224,0.0078125,0.40625,0.640625,0.3999023735523224,0.0,0.375,1.0
89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3408203125,0.400390625,0.4443359375,0.3999023735523224,0.0390625,0.40625,0.609375,0.3999023735523224,0.0,0.375,1.0
90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3095703125,0.400390625,0.48095703125,0.3999721109867096,0.0,0.40625,0.8046875,0.3999721109867096,0.0,0.375,1.0
91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31884765625,0.400390625,0.478515625,0.39990234375,0.0,0.40625,0.8046875,0.39990234375,0.0,0.375,1.0
92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3232421875,0.39990234375,0.46240234375,0.39990234375,0.0,0.40625,0.890625,0.39990234375,0.0,0.375,0.9375
93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33837890625,0.39990234375,0.462890625,0.39990234375,0.03125,0.3984375,0.9921875,0.39990234375,0.0,0.375,0.9375
94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.20947265625,0.39794921875,0.6337890625,0.39990234375,0.015625,0.40625,0.796875,0.39990234375,0.0,0.375,1.0
95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33203125,0.40087890625,0.44873046875,0.3999023735523224,0.0078125,0.40625,0.640625,0.3999023735523224,0.0,0.375,0.9375
96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33984375,0.400390625,0.44970703125,0.3999023735523224,0.0390625,0.40625,0.625,0.3999023735523224,0.0,0.375,1.0
97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3134765625,0.400390625,0.48876953125,0.3999721109867096,0.0,0.40625,0.875,0.3999721109867096,0.0,0.375,1.0
98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3212890625,0.400390625,0.466796875,0.39990234375,0.0078125,0.40625,0.7421875,0.39990234375,0.0,0.375,0.9375
99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3173828125,0.400390625,0.45751953125,0.39990234375,0.0078125,0.40625,0.921875,0.39990234375,0.0,0.375,0.9375
100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.310546875,0.400390625,0.478515625,0.39990234375,0.0078125,0.40625,0.9921875,0.39990234375,0.0,0.375,0.9375
101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.22021484375,0.3876953125,0.7177734375,0.39990234375,0.015625,0.390625,0.8984375,0.39990234375,0.0,0.375,1.0
102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3349609375,0.400390625,0.45458984375,0.3999023735523224,0.0078125,0.40625,0.6171875,0.3999023735523224,0.0,0.375,1.0
103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3369140625,0.400390625,0.4482421875,0.3999023735523224,0.03125,0.40625,0.6171875,0.3999023735523224,0.0,0.375,1.0
104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29638671875,0.400390625,0.48681640625,0.3999721109867096,0.03125,0.40625,0.84375,0.3999721109867096,0.0,0.375,1.0
105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3232421875,0.40087890625,0.4609375,0.39990234375,0.0078125,0.40625,0.734375,0.39990234375,0.0,0.375,1.0
106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3173828125,0.400390625,0.458984375,0.39990234375,0.0078125,0.4140625,0.8671875,0.39990234375,0.0,0.375,0.9375
107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3271484375,0.400390625,0.47412109375,0.39990234375,0.0234375,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1796875,0.37255859375,0.76904296875,0.39990234375,0.015625,0.375,0.8984375,0.39990234375,0.0,0.375,1.0
109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.330078125,0.400390625,0.44873046875,0.3999023735523224,0.0,0.40625,0.6171875,0.3999023735523224,0.0,0.375,1.0
110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3330078125,0.400390625,0.44970703125,0.3999023735523224,0.03125,0.40625,0.625,0.3999023735523224,0.0,0.375,0.9375
111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31591796875,0.39990234375,0.4912109375,0.3999721109867096,0.0390625,0.3984375,0.8671875,0.3999721109867096,0.0,0.375,1.0
112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31982421875,0.40087890625,0.46923828125,0.39990234375,0.0078125,0.4140625,0.734375,0.39990234375,0.0,0.375,0.9375
113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31884765625,0.400390625,0.46435546875,0.39990234375,0.0,0.4140625,0.890625,0.39990234375,0.0,0.375,0.9375
114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.39990234375,0.4794921875,0.39990234375,0.015625,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1708984375,0.38037109375,0.73876953125,0.39990234375,0.0078125,0.390625,0.8671875,0.39990234375,0.0,0.375,1.0
116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33447265625,0.400390625,0.45947265625,0.3999023735523224,0.0078125,0.40625,0.6640625,0.3999023735523224,0.0,0.375,1.0
117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3408203125,0.400390625,0.4482421875,0.3999023735523224,0.0234375,0.40625,0.6875,0.3999023735523224,0.0,0.375,1.0
118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31396484375,0.400390625,0.48193359375,0.3999721109867096,0.015625,0.40625,0.7734375,0.3999721109867096,0.0,0.375,1.0
119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.330078125,0.400390625,0.4609375,0.39990234375,0.0078125,0.4140625,0.7421875,0.39990234375,0.0,0.375,1.0
120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32421875,0.40087890625,0.46337890625,0.39990234375,0.0,0.4140625,0.8515625,0.39990234375,0.0,0.375,0.9375
121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33447265625,0.400390625,0.47119140625,0.39990234375,0.03125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1875,0.3798828125,0.80615234375,0.39990234375,0.015625,0.3828125,0.9375,0.39990234375,0.0,0.375,1.0
123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3359375,0.400390625,0.44873046875,0.3999023735523224,0.0,0.40625,0.75,0.3999023735523224,0.0,0.375,1.0
124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33984375,0.400390625,0.4501953125,0.3999023735523224,0.0234375,0.40625,0.6875,0.3999023735523224,0.0,0.375,1.0
125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3095703125,0.39990234375,0.4921875,0.3999721109867096,0.0390625,0.40625,0.859375,0.3999721109867096,0.0,0.375,1.0
126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31298828125,0.40087890625,0.47265625,0.39990234375,0.0078125,0.4140625,0.8359375,0.39990234375,0.0,0.375,0.9375
127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32666015625,0.40087890625,0.48486328125,0.39990234375,0.0,0.4140625,0.90625,0.39990234375,0.0,0.375,1.0
128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3203125,0.400390625,0.46484375,0.39990234375,0.015625,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1689453125,0.3203125,0.92822265625,0.39990234375,0.015625,0.34375,1.0,0.39990234375,0.0,0.375,1.0
130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33251953125,0.400390625,0.45166015625,0.3999023735523224,0.0078125,0.40625,0.6953125,0.3999023735523224,0.0,0.375,1.0
131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34228515625,0.400390625,0.4482421875,0.3999023735523224,0.03125,0.40625,0.7421875,0.3999023735523224,0.0,0.375,0.9375
132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31787109375,0.39990234375,0.47607421875,0.3999721109867096,0.0078125,0.40625,0.796875,0.3999721109867096,0.0,0.375,1.0
133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.32763671875,0.40087890625,0.4697265625,0.39990234375,0.0,0.4140625,0.7109375,0.39990234375,0.0,0.375,0.9375
134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.306640625,0.40087890625,0.48828125,0.39990234375,0.0,0.4140625,0.8671875,0.39990234375,0.0,0.375,0.9375
135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3232421875,0.39990234375,0.4794921875,0.39990234375,0.015625,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1298828125,0.3603515625,0.89990234375,0.39990234375,0.0078125,0.3671875,0.9921875,0.39990234375,0.0,0.375,1.0
137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33642578125,0.400390625,0.4521484375,0.3999023735523224,0.0,0.40625,0.6796875,0.3999023735523224,0.0,0.375,0.9375
138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3349609375,0.400390625,0.45166015625,0.3999023735523224,0.015625,0.40625,0.7578125,0.3999023735523224,0.0,0.375,1.0
139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.30810546875,0.400390625,0.48193359375,0.3999721109867096,0.0078125,0.40625,0.8125,0.3999721109867096,0.0,0.375,1.0
140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31591796875,0.40087890625,0.47119140625,0.39990234375,0.0078125,0.4140625,0.734375,0.39990234375,0.0,0.375,0.9375
141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31787109375,0.40087890625,0.4716796875,0.39990234375,0.0078125,0.4140625,0.875,0.39990234375,0.0,0.375,0.9375
142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32275390625,0.400390625,0.48046875,0.39990234375,0.015625,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.18310546875,0.34619140625,0.912109375,0.39990234375,0.015625,0.3515625,1.0,0.39990234375,0.0,0.375,1.0
144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33447265625,0.400390625,0.45068359375,0.3999023735523224,0.0,0.40625,0.6953125,0.3999023735523224,0.0,0.375,0.9375
145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3466796875,0.400390625,0.45263671875,0.3999023735523224,0.03125,0.40625,0.7734375,0.3999023735523224,0.0,0.375,1.0
146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3173828125,0.400390625,0.48876953125,0.3999721109867096,0.0078125,0.40625,0.765625,0.3999721109867096,0.0,0.375,1.0
147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31689453125,0.40087890625,0.462890625,0.39990234375,0.0078125,0.4140625,0.671875,0.39990234375,0.0,0.375,1.0
148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.40087890625,0.47119140625,0.39990234375,0.0,0.4140625,0.7890625,0.39990234375,0.0,0.375,0.9375
149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3232421875,0.39990234375,0.47314453125,0.39990234375,0.015625,0.40625,1.0,0.39990234375,0.0,0.375,1.0
150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1513671875,0.36376953125,0.931640625,0.39990234375,0.0,0.375,1.0,0.39990234375,0.0,0.375,1.0
151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.400390625,0.44921875,0.3999023735523224,0.0078125,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.330078125,0.400390625,0.455078125,0.3999023735523224,0.0234375,0.40625,0.7578125,0.3999023735523224,0.0,0.375,1.0
153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.30517578125,0.400390625,0.486328125,0.3999721109867096,0.03125,0.4140625,0.7421875,0.3999721109867096,0.0,0.375,1.0
154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31494140625,0.40087890625,0.4716796875,0.39990234375,0.0,0.4140625,0.6484375,0.39990234375,0.0,0.375,0.9375
155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3134765625,0.40087890625,0.46923828125,0.39990234375,0.0,0.4140625,0.8515625,0.39990234375,0.0,0.375,1.0
156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32666015625,0.39990234375,0.4677734375,0.39990234375,0.0234375,0.4140625,1.0,0.39990234375,0.0,0.375,0.875
157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.171875,0.38134765625,0.78173828125,0.39990234375,0.0078125,0.3828125,0.9296875,0.39990234375,0.0,0.375,1.0
158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34521484375,0.400390625,0.453125,0.3999023735523224,0.0078125,0.40625,0.7109375,0.3999023735523224,0.0,0.375,1.0
159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3408203125,0.400390625,0.45068359375,0.3999023735523224,0.015625,0.3984375,0.75,0.3999023735523224,0.0,0.375,0.9375
160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.298828125,0.39990234375,0.47998046875,0.3999721109867096,0.015625,0.4140625,0.7265625,0.3999721109867096,0.0,0.375,1.0
161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31103515625,0.40087890625,0.46875,0.39990234375,0.0,0.4140625,0.7265625,0.39990234375,0.0,0.375,1.0
162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31884765625,0.40087890625,0.46240234375,0.39990234375,0.0,0.4140625,0.84375,0.39990234375,0.0,0.375,0.875
163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.330078125,0.39990234375,0.46484375,0.39990234375,0.0234375,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.15966796875,0.388671875,0.72705078125,0.39990234375,0.0078125,0.3828125,0.890625,0.39990234375,0.0,0.375,1.0
165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34375,0.400390625,0.4462890625,0.3999023735523224,0.0,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3388671875,0.400390625,0.451171875,0.3999023735523224,0.0234375,0.3984375,0.7578125,0.3999023735523224,0.0,0.375,1.0
167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29638671875,0.400390625,0.484375,0.3999721109867096,0.0,0.4140625,0.6953125,0.3999721109867096,0.0,0.375,1.0
168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31689453125,0.400390625,0.45947265625,0.39990234375,0.0,0.4140625,0.734375,0.39990234375,0.0,0.375,0.9375
169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3173828125,0.400390625,0.46875,0.39990234375,0.0,0.4140625,0.84375,0.39990234375,0.0,0.375,0.9375
170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31884765625,0.39990234375,0.4873046875,0.39990234375,0.015625,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.14599609375,0.34130859375,0.81201171875,0.39990234375,0.0078125,0.359375,0.9765625,0.39990234375,0.0,0.375,1.0
172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34228515625,0.400390625,0.44970703125,0.3999023735523224,0.0,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34521484375,0.400390625,0.45263671875,0.3999023735523224,0.015625,0.3984375,0.78125,0.3999023735523224,0.0,0.375,0.9375
174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3056640625,0.40087890625,0.4814453125,0.3999721109867096,0.0,0.421875,0.703125,0.3999721109867096,0.0,0.375,1.0
175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31884765625,0.400390625,0.46728515625,0.39990234375,0.0,0.4140625,0.6796875,0.39990234375,0.0,0.375,0.9375
176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.318359375,0.40087890625,0.490234375,0.39990234375,0.0,0.4140625,0.8125,0.39990234375,0.0,0.375,0.9375
177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.39990234375,0.47900390625,0.39990234375,0.015625,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.140625,0.33642578125,0.95654296875,0.39990234375,0.015625,0.34375,1.0,0.39990234375,0.0,0.375,1.0
179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34375,0.400390625,0.44970703125,0.3999023735523224,0.0,0.40625,0.734375,0.3999023735523224,0.0,0.375,0.9375
180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3427734375,0.400390625,0.44580078125,0.3999023735523224,0.0234375,0.3984375,0.7578125,0.3999023735523224,0.0,0.375,1.0
181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.2890625,0.400390625,0.49267578125,0.3999721109867096,0.015625,0.421875,0.671875,0.3999721109867096,0.0,0.375,1.0
182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3212890625,0.40087890625,0.4677734375,0.39990234375,0.0,0.4140625,0.6484375,0.39990234375,0.0,0.375,0.9375
183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31103515625,0.40087890625,0.49462890625,0.39990234375,0.0,0.4140625,0.8359375,0.39990234375,0.0,0.375,0.9375
184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.400390625,0.47509765625,0.39990234375,0.0078125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1533203125,0.35302734375,0.82568359375,0.39990234375,0.015625,0.3515625,0.9296875,0.39990234375,0.0,0.375,1.0
186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.400390625,0.44970703125,0.3999023735523224,0.0078125,0.40625,0.75,0.3999023735523224,0.0,0.375,1.0
187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3369140625,0.400390625,0.447265625,0.3999023735523224,0.015625,0.3984375,0.78125,0.3999023735523224,0.0,0.375,1.0
188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29248046875,0.4013671875,0.48974609375,0.3999721109867096,0.015625,0.421875,0.7421875,0.3999721109867096,0.0,0.375,1.0
189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3232421875,0.400390625,0.48046875,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3232421875,0.400390625,0.4892578125,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31982421875,0.40087890625,0.47998046875,0.39990234375,0.0234375,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.14306640625,0.37255859375,0.794921875,0.39990234375,0.0078125,0.359375,0.9609375,0.39990234375,0.0,0.375,1.0
193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33349609375,0.400390625,0.4521484375,0.3999023735523224,0.0078125,0.40625,0.7109375,0.3999023735523224,0.0,0.375,1.0
194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34228515625,0.400390625,0.4560546875,0.3999023735523224,0.0078125,0.3984375,0.7890625,0.3999023735523224,0.0,0.375,1.0
195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29736328125,0.40087890625,0.486328125,0.3999721109867096,0.0,0.421875,0.6953125,0.3999721109867096,0.0,0.375,1.0
196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3056640625,0.4013671875,0.4599609375,0.39990234375,0.0,0.4140625,0.6484375,0.39990234375,0.0,0.375,1.0
197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31640625,0.40087890625,0.46630859375,0.39990234375,0.0,0.4140625,0.8515625,0.39990234375,0.0,0.375,0.9375
198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3291015625,0.400390625,0.47119140625,0.39990234375,0.0234375,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.130859375,0.38916015625,0.82421875,0.39990234375,0.015625,0.390625,0.9140625,0.39990234375,0.0,0.375,1.0
200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33544921875,0.40087890625,0.455078125,0.3999023735523224,0.0,0.40625,0.71875,0.3999023735523224,0.0,0.375,1.0
201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34033203125,0.400390625,0.45068359375,0.3999023735523224,0.0234375,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.28857421875,0.4013671875,0.486328125,0.3999721109867096,0.0078125,0.4296875,0.6953125,0.3999721109867096,0.0,0.375,1.0
203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31787109375,0.40087890625,0.490234375,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,1.0
204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31640625,0.40087890625,0.46826171875,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33349609375,0.39990234375,0.46826171875,0.39990234375,0.0078125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.08544921875,0.421875,0.71240234375,0.39990234375,0.0078125,0.4140625,0.8828125,0.39990234375,0.0,0.375,1.0
207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.337890625,0.400390625,0.44921875,0.3999023735523224,0.0078125,0.40625,0.7734375,0.3999023735523224,0.0,0.375,1.0
208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3349609375,0.400390625,0.4501953125,0.3999023735523224,0.015625,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.306640625,0.4013671875,0.49267578125,0.3999721109867096,0.0,0.4296875,0.7421875,0.3999721109867096,0.0,0.375,1.0
210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.298828125,0.40087890625,0.4736328125,0.39990234375,0.0,0.4140625,0.65625,0.39990234375,0.0,0.375,0.9375
211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31103515625,0.40087890625,0.48095703125,0.39990234375,0.0,0.421875,0.859375,0.39990234375,0.0,0.375,0.9375
212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31005859375,0.400390625,0.48583984375,0.39990234375,0.0078125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.125,0.357421875,0.9443359375,0.39990234375,0.03125,0.359375,0.9921875,0.39990234375,0.0,0.375,1.0
214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33935546875,0.400390625,0.4638671875,0.3999023735523224,0.0078125,0.40625,0.828125,0.3999023735523224,0.0,0.375,0.9375
215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.40087890625,0.4580078125,0.3999023735523224,0.015625,0.40625,0.8046875,0.3999023735523224,0.0,0.375,1.0
216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.27734375,0.4013671875,0.49951171875,0.3999721109867096,0.0,0.4296875,0.8125,0.3999721109867096,0.0,0.375,0.9375
217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.302734375,0.400390625,0.4716796875,0.39990234375,0.0,0.4140625,0.9375,0.39990234375,0.0,0.375,1.0
218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.306640625,0.40087890625,0.4638671875,0.39990234375,0.0,0.4140625,0.90625,0.39990234375,0.0,0.375,0.9375
219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.29638671875,0.400390625,0.46142578125,0.39990234375,0.0078125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.05615234375,0.361328125,0.888671875,0.39990234375,0.0,0.3671875,0.9921875,0.39990234375,0.0,0.375,1.0
221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3427734375,0.400390625,0.45947265625,0.3999023735523224,0.0390625,0.40625,0.84375,0.3999023735523224,0.0,0.375,1.0
222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.337890625,0.400390625,0.451171875,0.3999023735523224,0.0390625,0.40625,0.796875,0.3999023735523224,0.0,0.375,1.0
223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.2470703125,0.39990234375,0.5361328125,0.3999721109867096,0.0,0.453125,0.890625,0.3999721109867096,0.0,0.375,1.0
224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0