|
row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max |
|
0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.240234375,0.60009765625,0.87744140625,0.599853515625,0.0,0.6875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.279296875,0.59912109375,0.87158203125,0.599853515625,0.0,0.6796875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.31640625,0.59912109375,0.84521484375,0.599853515625,0.0,0.5546875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.10302734375,0.611328125,1.0,0.599853515625,0.0078125,0.59375,1.0,0.599853515625,0.0,0.625,1.0 |
|
4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51708984375,0.60009765625,0.6728515625,0.599853515625,0.078125,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53125,0.599609375,0.67724609375,0.599853515625,0.1015625,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4208984375,0.60107421875,0.7314453125,0.5999581813812256,0.046875,0.6171875,0.984375,0.5999581813812256,0.0,0.625,1.0 |
|
7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3779296875,0.60205078125,0.79345703125,0.599853515625,0.0,0.6640625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.37060546875,0.60107421875,0.77734375,0.599853515625,0.0,0.6640625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.39208984375,0.60205078125,0.7333984375,0.599853515625,0.0234375,0.65625,1.0,0.599853515625,0.125,0.625,1.0 |
|
10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.28466796875,0.62353515625,0.98974609375,0.599853515625,0.0390625,0.5859375,1.0,0.599853515625,0.0,0.625,1.0 |
|
11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52783203125,0.60009765625,0.677734375,0.599853515625,0.1640625,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52734375,0.599609375,0.66845703125,0.599853515625,0.140625,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48828125,0.6015625,0.7109375,0.5999581813812256,0.0,0.6328125,0.9375,0.5999581813812256,0.0,0.625,1.0 |
|
14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.45166015625,0.6025390625,0.7451171875,0.599853515625,0.0,0.6484375,1.0,0.599853515625,0.0,0.625,1.0 |
|
15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.427734375,0.60302734375,0.7607421875,0.599853515625,0.0078125,0.65625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4853515625,0.6025390625,0.7099609375,0.599853515625,0.0546875,0.640625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2900390625,0.54296875,0.98779296875,0.599853515625,0.0078125,0.5625,1.0,0.599853515625,0.0,0.5625,1.0 |
|
18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52880859375,0.60009765625,0.66455078125,0.599853515625,0.1328125,0.6015625,1.0,0.599853515625,0.0,0.625,1.0 |
|
19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53076171875,0.599609375,0.6650390625,0.599853515625,0.1796875,0.6015625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.474609375,0.60107421875,0.73974609375,0.5999581813812256,0.015625,0.625,0.9140625,0.5999581813812256,0.0625,0.625,1.0 |
|
21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48876953125,0.60107421875,0.705078125,0.599853515625,0.0234375,0.6328125,1.0,0.599853515625,0.0,0.625,1.0 |
|
22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46923828125,0.60205078125,0.69921875,0.599853515625,0.015625,0.640625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49658203125,0.60107421875,0.6923828125,0.599853515625,0.0625,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.32666015625,0.58544921875,0.96142578125,0.599853515625,0.046875,0.5859375,1.0,0.599853515625,0.0,0.625,1.0 |
|
25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53466796875,0.60009765625,0.66748046875,0.599853515625,0.109375,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.54150390625,0.599609375,0.6611328125,0.599853515625,0.1953125,0.6015625,1.0,0.599853515625,0.0,0.625,1.0 |
|
27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4814453125,0.6005859375,0.71240234375,0.5999581813812256,0.03125,0.6171875,0.9375,0.5999581813812256,0.0,0.625,1.0 |
|
28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4619140625,0.60107421875,0.71728515625,0.599853515625,0.0078125,0.640625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46142578125,0.60107421875,0.71923828125,0.599853515625,0.0078125,0.640625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49609375,0.6015625,0.71923828125,0.599853515625,0.0546875,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.27587890625,0.56298828125,0.9892578125,0.599853515625,0.0234375,0.5703125,1.0,0.599853515625,0.0,0.5625,1.0 |
|
32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52001953125,0.6005859375,0.66064453125,0.599853515625,0.078125,0.609375,1.0,0.599853515625,0.0625,0.625,1.0 |
|
33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53759765625,0.6005859375,0.66357421875,0.599853515625,0.1875,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48779296875,0.60107421875,0.70263671875,0.5999581813812256,0.0546875,0.609375,0.984375,0.5999581813812256,0.0,0.625,1.0 |
|
35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.44970703125,0.6005859375,0.7177734375,0.599853515625,0.0078125,0.6328125,0.96875,0.599853515625,0.0625,0.625,1.0 |
|
36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.455078125,0.6005859375,0.708984375,0.599853515625,0.015625,0.640625,0.9296875,0.599853515625,0.0625,0.625,1.0 |
|
37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49755859375,0.6005859375,0.70751953125,0.599853515625,0.0625,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.25390625,0.5859375,0.9423828125,0.599853515625,0.0078125,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51953125,0.60107421875,0.658203125,0.599853515625,0.046875,0.609375,1.0,0.599853515625,0.0625,0.625,1.0 |
|
40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52392578125,0.6005859375,0.6630859375,0.599853515625,0.140625,0.6015625,1.0,0.599853515625,0.0,0.625,1.0 |
|
41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48828125,0.6005859375,0.7041015625,0.5999581813812256,0.0,0.609375,0.953125,0.5999581813812256,0.0,0.625,1.0 |
|
42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.47412109375,0.6005859375,0.7080078125,0.599853515625,0.015625,0.6328125,0.9609375,0.599853515625,0.0625,0.625,1.0 |
|
43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46484375,0.6005859375,0.705078125,0.599853515625,0.015625,0.6328125,0.953125,0.599853515625,0.0625,0.625,1.0 |
|
44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.498046875,0.6005859375,0.685546875,0.599853515625,0.0703125,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3115234375,0.60302734375,0.94775390625,0.599853515625,0.0234375,0.6015625,1.0,0.599853515625,0.0,0.625,1.0 |
|
46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51416015625,0.60107421875,0.658203125,0.599853515625,0.0625,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.6005859375,0.64892578125,0.599853515625,0.1171875,0.6015625,1.0,0.599853515625,0.0,0.625,1.0 |
|
48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48291015625,0.60009765625,0.70654296875,0.5999581813812256,0.0078125,0.609375,0.984375,0.5999581813812256,0.0,0.625,1.0 |
|
49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48291015625,0.60107421875,0.68408203125,0.599853515625,0.0234375,0.625,0.9765625,0.599853515625,0.0625,0.625,1.0 |
|
50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49072265625,0.60205078125,0.6845703125,0.599853515625,0.0078125,0.625,0.8984375,0.599853515625,0.0625,0.625,1.0 |
|
51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.50537109375,0.6005859375,0.67626953125,0.599853515625,0.0703125,0.609375,0.9921875,0.599853515625,0.0625,0.625,1.0 |
|
52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.41748046875,0.5927734375,0.798828125,0.599853515625,0.0234375,0.6171875,0.9921875,0.599853515625,0.0,0.625,1.0 |
|
53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51904296875,0.6015625,0.64892578125,0.599853515625,0.046875,0.609375,0.8203125,0.599853515625,0.0,0.625,1.0 |
|
54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.529296875,0.6005859375,0.650390625,0.599853515625,0.140625,0.6015625,0.8125,0.599853515625,0.0,0.625,1.0 |
|
55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.49072265625,0.60107421875,0.693359375,0.5999581813812256,0.0078125,0.609375,0.9921875,0.5999581813812256,0.0,0.625,1.0 |
|
56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48193359375,0.6005859375,0.70654296875,0.599853515625,0.015625,0.6171875,0.9296875,0.599853515625,0.0,0.625,1.0 |
|
57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48193359375,0.60107421875,0.68701171875,0.599853515625,0.0,0.625,0.953125,0.599853515625,0.0625,0.625,1.0 |
|
58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.50537109375,0.6005859375,0.666015625,0.599853515625,0.0390625,0.609375,0.9921875,0.599853515625,0.0,0.625,1.0 |
|
59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.31689453125,0.6064453125,0.83056640625,0.599853515625,0.03125,0.609375,0.9609375,0.599853515625,0.0,0.625,1.0 |
|
60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5166015625,0.6015625,0.6494140625,0.599853515625,0.03125,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5205078125,0.60107421875,0.65234375,0.599853515625,0.1015625,0.6015625,1.0,0.599853515625,0.0,0.625,1.0 |
|
62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4775390625,0.6005859375,0.705078125,0.5999581813812256,0.0390625,0.609375,0.953125,0.5999581813812256,0.0,0.625,1.0 |
|
63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4814453125,0.60107421875,0.6943359375,0.599853515625,0.015625,0.6171875,0.9375,0.599853515625,0.0625,0.625,1.0 |
|
64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47607421875,0.6005859375,0.68701171875,0.599853515625,0.0078125,0.6171875,0.953125,0.599853515625,0.0625,0.625,1.0 |
|
65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.5146484375,0.60009765625,0.67529296875,0.599853515625,0.0703125,0.6015625,1.0,0.599853515625,0.125,0.625,1.0 |
|
66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.337890625,0.60888671875,0.8427734375,0.599853515625,0.0625,0.6171875,0.9453125,0.599853515625,0.0,0.625,1.0 |
|
67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.6015625,0.65576171875,0.599853515625,0.0234375,0.609375,0.8515625,0.599853515625,0.0,0.625,1.0 |
|
68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51953125,0.60107421875,0.65185546875,0.599853515625,0.0859375,0.609375,0.8359375,0.599853515625,0.0625,0.625,1.0 |
|
69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.47705078125,0.60009765625,0.70703125,0.5999581813812256,0.0078125,0.609375,0.9609375,0.5999581813812256,0.0,0.625,1.0 |
|
70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.490234375,0.6005859375,0.6982421875,0.599853515625,0.015625,0.6171875,0.953125,0.599853515625,0.0625,0.625,1.0 |
|
71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4755859375,0.6005859375,0.68701171875,0.599853515625,0.0,0.625,0.9453125,0.599853515625,0.0625,0.625,1.0 |
|
72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49951171875,0.60009765625,0.6640625,0.599853515625,0.0859375,0.609375,0.9921875,0.599853515625,0.0625,0.625,1.0 |
|
73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2880859375,0.6279296875,0.84130859375,0.599853515625,0.015625,0.625,0.96875,0.599853515625,0.0,0.625,1.0 |
|
74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52001953125,0.6015625,0.65625,0.599853515625,0.03125,0.609375,0.8359375,0.599853515625,0.0,0.625,1.0 |
|
75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.521484375,0.6005859375,0.65576171875,0.599853515625,0.1171875,0.6015625,0.828125,0.599853515625,0.0,0.625,1.0 |
|
76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.47412109375,0.6005859375,0.69921875,0.5999581813812256,0.0078125,0.6171875,0.953125,0.5999581813812256,0.0,0.625,1.0 |
|
77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48046875,0.6005859375,0.673828125,0.599853515625,0.015625,0.609375,0.953125,0.599853515625,0.0625,0.625,1.0 |
|
78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4892578125,0.60107421875,0.67529296875,0.599853515625,0.015625,0.6171875,0.9765625,0.599853515625,0.0625,0.625,1.0 |
|
79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.50439453125,0.60009765625,0.67578125,0.599853515625,0.1171875,0.609375,1.0,0.599853515625,0.0625,0.625,1.0 |
|
80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3662109375,0.59521484375,0.88818359375,0.599853515625,0.03125,0.6171875,0.984375,0.599853515625,0.0,0.625,1.0 |
|
81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.517578125,0.60107421875,0.658203125,0.599853515625,0.015625,0.609375,0.828125,0.599853515625,0.0,0.625,1.0 |
|
82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52880859375,0.6005859375,0.65576171875,0.599853515625,0.109375,0.609375,0.8515625,0.599853515625,0.0,0.625,1.0 |
|
83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.47607421875,0.60009765625,0.72412109375,0.5999581813812256,0.0,0.6171875,0.96875,0.5999581813812256,0.0,0.625,1.0 |
|
84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4853515625,0.6015625,0.68701171875,0.599853515625,0.015625,0.609375,0.953125,0.599853515625,0.0,0.625,1.0 |
|
85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47900390625,0.6015625,0.6767578125,0.599853515625,0.0234375,0.6171875,0.984375,0.599853515625,0.0625,0.625,1.0 |
|
86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49609375,0.60009765625,0.66796875,0.599853515625,0.0703125,0.6015625,0.9921875,0.599853515625,0.0625,0.625,1.0 |
|
87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.35009765625,0.60498046875,0.8291015625,0.599853515625,0.046875,0.6171875,0.9765625,0.599853515625,0.0,0.625,1.0 |
|
88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5107421875,0.6015625,0.65869140625,0.599853515625,0.0390625,0.609375,0.828125,0.599853515625,0.0625,0.625,1.0 |
|
89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51708984375,0.6005859375,0.65576171875,0.599853515625,0.09375,0.609375,0.8203125,0.599853515625,0.0,0.625,1.0 |
|
90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4736328125,0.6015625,0.728515625,0.5999581813812256,0.015625,0.6171875,0.984375,0.5999581813812256,0.0625,0.625,1.0 |
|
91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.50244140625,0.60009765625,0.68115234375,0.599853515625,0.0078125,0.609375,0.984375,0.599853515625,0.0625,0.625,1.0 |
|
92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49755859375,0.6015625,0.6748046875,0.599853515625,0.0234375,0.6171875,0.9921875,0.599853515625,0.0625,0.625,1.0 |
|
93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.52587890625,0.60009765625,0.6708984375,0.599853515625,0.1015625,0.609375,0.9921875,0.599853515625,0.0,0.625,1.0 |
|
94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.33837890625,0.6083984375,0.80615234375,0.599853515625,0.046875,0.625,0.9296875,0.599853515625,0.0,0.625,1.0 |
|
95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52001953125,0.6015625,0.6650390625,0.599853515625,0.03125,0.609375,0.8125,0.599853515625,0.0625,0.625,1.0 |
|
96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53076171875,0.6005859375,0.65478515625,0.599853515625,0.109375,0.609375,0.8203125,0.599853515625,0.0625,0.625,1.0 |
|
97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.474609375,0.60107421875,0.72216796875,0.5999581813812256,0.0,0.609375,0.9765625,0.5999581813812256,0.0625,0.625,1.0 |
|
98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49169921875,0.60107421875,0.68505859375,0.599853515625,0.015625,0.6171875,0.984375,0.599853515625,0.0625,0.625,1.0 |
|
99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4931640625,0.6005859375,0.67333984375,0.599853515625,0.015625,0.6171875,0.984375,0.599853515625,0.0625,0.625,1.0 |
|
100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4736328125,0.60107421875,0.69775390625,0.599853515625,0.03125,0.6171875,0.9921875,0.599853515625,0.0625,0.625,1.0 |
|
101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.36328125,0.61328125,0.8388671875,0.599853515625,0.0234375,0.609375,0.96875,0.599853515625,0.0,0.625,1.0 |
|
102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5146484375,0.60107421875,0.66650390625,0.599853515625,0.03125,0.6171875,0.8359375,0.599853515625,0.0,0.625,1.0 |
|
103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.525390625,0.6005859375,0.6591796875,0.599853515625,0.1015625,0.609375,0.8359375,0.599853515625,0.0,0.625,1.0 |
|
104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4267578125,0.6015625,0.71484375,0.5999581813812256,0.0390625,0.6171875,0.9765625,0.5999581813812256,0.0,0.625,1.0 |
|
105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49853515625,0.6015625,0.6767578125,0.599853515625,0.0078125,0.6171875,0.984375,0.599853515625,0.0625,0.625,1.0 |
|
106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.5009765625,0.6005859375,0.6708984375,0.599853515625,0.015625,0.625,0.984375,0.599853515625,0.0625,0.625,1.0 |
|
107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48291015625,0.6015625,0.6767578125,0.599853515625,0.03125,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.32373046875,0.58447265625,0.9599609375,0.599853515625,0.046875,0.6015625,1.0,0.599853515625,0.0,0.625,1.0 |
|
109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51171875,0.60107421875,0.6640625,0.599853515625,0.015625,0.6171875,0.84375,0.599853515625,0.0,0.625,1.0 |
|
110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5244140625,0.6005859375,0.66259765625,0.599853515625,0.0703125,0.609375,0.84375,0.599853515625,0.0,0.625,1.0 |
|
111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4580078125,0.60009765625,0.72412109375,0.5999581813812256,0.0390625,0.609375,0.9921875,0.5999581813812256,0.0,0.625,1.0 |
|
112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4853515625,0.60205078125,0.6884765625,0.599853515625,0.015625,0.625,0.9609375,0.599853515625,0.0,0.625,1.0 |
|
113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.490234375,0.60205078125,0.68212890625,0.599853515625,0.0,0.625,0.96875,0.599853515625,0.0625,0.625,1.0 |
|
114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46240234375,0.6015625,0.6962890625,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2744140625,0.5703125,0.99462890625,0.599853515625,0.0390625,0.5859375,1.0,0.599853515625,0.0,0.5625,1.0 |
|
116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51416015625,0.60107421875,0.669921875,0.599853515625,0.0,0.6171875,0.8671875,0.599853515625,0.0,0.625,1.0 |
|
117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51953125,0.6005859375,0.65869140625,0.599853515625,0.046875,0.609375,0.875,0.599853515625,0.0,0.625,1.0 |
|
118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4609375,0.60107421875,0.72607421875,0.5999581813812256,0.015625,0.609375,0.9765625,0.5999581813812256,0.0625,0.625,1.0 |
|
119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49853515625,0.6015625,0.67626953125,0.599853515625,0.015625,0.625,0.890625,0.599853515625,0.0625,0.625,1.0 |
|
120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49365234375,0.6015625,0.68115234375,0.599853515625,0.015625,0.6328125,0.953125,0.599853515625,0.0625,0.625,1.0 |
|
121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49560546875,0.6005859375,0.6728515625,0.599853515625,0.0859375,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.31689453125,0.564453125,0.99560546875,0.599853515625,0.0625,0.578125,1.0,0.599853515625,0.0,0.5625,1.0 |
|
123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5126953125,0.6005859375,0.662109375,0.599853515625,0.0234375,0.6171875,0.875,0.599853515625,0.0,0.625,1.0 |
|
124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51904296875,0.6005859375,0.66455078125,0.599853515625,0.046875,0.609375,0.8828125,0.599853515625,0.0,0.625,1.0 |
|
125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48291015625,0.60009765625,0.70849609375,0.5999581813812256,0.03125,0.6171875,0.9921875,0.5999581813812256,0.0625,0.625,1.0 |
|
126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49853515625,0.60107421875,0.68798828125,0.599853515625,0.0234375,0.625,0.96875,0.599853515625,0.0625,0.625,1.0 |
|
127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4912109375,0.60107421875,0.70361328125,0.599853515625,0.0078125,0.6328125,0.96875,0.599853515625,0.0625,0.625,1.0 |
|
128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46630859375,0.6015625,0.69775390625,0.599853515625,0.03125,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.23779296875,0.55126953125,1.0,0.599853515625,0.0234375,0.5703125,1.0,0.599853515625,0.0,0.5625,1.0 |
|
130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51123046875,0.60107421875,0.6640625,0.599853515625,0.015625,0.6171875,0.859375,0.599853515625,0.0,0.625,1.0 |
|
131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52099609375,0.6005859375,0.66552734375,0.599853515625,0.0625,0.609375,0.9296875,0.599853515625,0.0,0.625,1.0 |
|
132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.46923828125,0.60009765625,0.70703125,0.5999581813812256,0.015625,0.6171875,0.984375,0.5999581813812256,0.0,0.625,1.0 |
|
133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48486328125,0.60205078125,0.6884765625,0.599853515625,0.015625,0.625,0.8671875,0.599853515625,0.0,0.625,1.0 |
|
134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4775390625,0.60205078125,0.71533203125,0.599853515625,0.0078125,0.6328125,0.9609375,0.599853515625,0.0625,0.625,1.0 |
|
135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4912109375,0.6015625,0.697265625,0.599853515625,0.03125,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.27978515625,0.4970703125,1.0,0.599853515625,0.0234375,0.5390625,1.0,0.599853515625,0.0,0.5625,1.0 |
|
137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51171875,0.60107421875,0.66162109375,0.599853515625,0.015625,0.609375,0.875,0.599853515625,0.0,0.625,1.0 |
|
138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5244140625,0.6005859375,0.66064453125,0.599853515625,0.0703125,0.609375,0.9140625,0.599853515625,0.0,0.625,1.0 |
|
139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.46533203125,0.6005859375,0.71630859375,0.5999581813812256,0.015625,0.625,0.96875,0.5999581813812256,0.0,0.625,1.0 |
|
140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.478515625,0.6015625,0.69287109375,0.599853515625,0.015625,0.625,0.875,0.599853515625,0.0625,0.625,1.0 |
|
141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48095703125,0.60205078125,0.693359375,0.599853515625,0.0078125,0.6328125,0.9609375,0.599853515625,0.0625,0.625,1.0 |
|
142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46875,0.60107421875,0.705078125,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.28125,0.56591796875,0.99365234375,0.599853515625,0.03125,0.5625,1.0,0.599853515625,0.0,0.5625,1.0 |
|
144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.50927734375,0.60107421875,0.6630859375,0.599853515625,0.015625,0.6171875,0.875,0.599853515625,0.0,0.625,1.0 |
|
145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51953125,0.6005859375,0.66064453125,0.599853515625,0.0625,0.609375,0.9296875,0.599853515625,0.0,0.625,1.0 |
|
146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48193359375,0.6005859375,0.71826171875,0.5999581813812256,0.015625,0.625,0.9609375,0.5999581813812256,0.0625,0.625,1.0 |
|
147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48388671875,0.60205078125,0.685546875,0.599853515625,0.0078125,0.625,0.8671875,0.599853515625,0.0625,0.625,1.0 |
|
148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48583984375,0.60205078125,0.68994140625,0.599853515625,0.0,0.6328125,0.8984375,0.599853515625,0.0625,0.625,1.0 |
|
149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47705078125,0.60107421875,0.68701171875,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3125,0.564453125,1.0,0.599853515625,0.0078125,0.578125,1.0,0.599853515625,0.0,0.5625,1.0 |
|
151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.513671875,0.6005859375,0.66748046875,0.599853515625,0.0,0.6171875,0.8984375,0.599853515625,0.0,0.625,1.0 |
|
152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51171875,0.6005859375,0.65966796875,0.599853515625,0.0625,0.609375,0.9453125,0.599853515625,0.0,0.625,1.0 |
|
153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.44482421875,0.60107421875,0.708984375,0.5999581813812256,0.0234375,0.625,0.953125,0.5999581813812256,0.0625,0.625,1.0 |
|
154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.47705078125,0.60107421875,0.6943359375,0.599853515625,0.0078125,0.625,0.859375,0.599853515625,0.0,0.625,1.0 |
|
155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4873046875,0.6015625,0.69482421875,0.599853515625,0.0078125,0.6328125,0.90625,0.599853515625,0.0625,0.625,1.0 |
|
156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48681640625,0.60107421875,0.70263671875,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.263671875,0.525390625,1.0,0.599853515625,0.03125,0.546875,1.0,0.599853515625,0.0,0.5625,1.0 |
|
158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.50927734375,0.60107421875,0.66064453125,0.599853515625,0.015625,0.609375,0.8671875,0.599853515625,0.0,0.625,1.0 |
|
159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.50927734375,0.6005859375,0.66015625,0.599853515625,0.046875,0.609375,0.9375,0.599853515625,0.0,0.625,1.0 |
|
160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.462890625,0.60107421875,0.71484375,0.5999581813812256,0.015625,0.6328125,0.9375,0.5999581813812256,0.0,0.625,1.0 |
|
161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.47705078125,0.60205078125,0.69140625,0.599853515625,0.0,0.625,0.8671875,0.599853515625,0.0,0.625,1.0 |
|
162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4775390625,0.6015625,0.69287109375,0.599853515625,0.0,0.6328125,0.9140625,0.599853515625,0.0625,0.625,1.0 |
|
163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4912109375,0.60107421875,0.69140625,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3017578125,0.56884765625,0.99951171875,0.599853515625,0.015625,0.578125,1.0,0.599853515625,0.0,0.5625,1.0 |
|
165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.513671875,0.60107421875,0.66015625,0.599853515625,0.0078125,0.609375,0.875,0.599853515625,0.0,0.625,1.0 |
|
166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5146484375,0.60107421875,0.662109375,0.599853515625,0.046875,0.609375,0.921875,0.599853515625,0.0,0.625,1.0 |
|
167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.46044921875,0.60205078125,0.708984375,0.5999581813812256,0.0078125,0.6328125,0.9140625,0.5999581813812256,0.0,0.625,1.0 |
|
168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4755859375,0.6015625,0.685546875,0.599853515625,0.0078125,0.6328125,0.859375,0.599853515625,0.0625,0.625,1.0 |
|
169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4912109375,0.60205078125,0.7080078125,0.599853515625,0.0,0.6328125,0.890625,0.599853515625,0.0625,0.625,1.0 |
|
170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.474609375,0.6015625,0.705078125,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.26171875,0.55029296875,1.0,0.599853515625,0.0234375,0.5625,1.0,0.599853515625,0.0,0.5625,1.0 |
|
172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.515625,0.6005859375,0.6640625,0.599853515625,0.0078125,0.609375,0.890625,0.599853515625,0.0,0.625,1.0 |
|
173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52392578125,0.60107421875,0.65966796875,0.599853515625,0.046875,0.609375,0.9296875,0.599853515625,0.0625,0.625,1.0 |
|
174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4423828125,0.60205078125,0.71435546875,0.5999581813812256,0.0,0.640625,0.9140625,0.5999581813812256,0.0625,0.625,1.0 |
|
175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49365234375,0.60107421875,0.69921875,0.599853515625,0.0,0.6328125,0.8671875,0.599853515625,0.0625,0.625,1.0 |
|
176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4794921875,0.60205078125,0.72607421875,0.599853515625,0.0,0.6328125,0.8984375,0.599853515625,0.0,0.625,1.0 |
|
177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46337890625,0.6005859375,0.69482421875,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.125,0.625,1.0 |
|
178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2294921875,0.62451171875,1.0,0.599853515625,0.0234375,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52197265625,0.60107421875,0.6591796875,0.599853515625,0.015625,0.609375,0.8984375,0.599853515625,0.0,0.625,1.0 |
|
180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52099609375,0.60107421875,0.658203125,0.599853515625,0.046875,0.609375,0.90625,0.599853515625,0.0625,0.625,1.0 |
|
181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.46484375,0.60205078125,0.72265625,0.5999581813812256,0.0078125,0.640625,0.90625,0.5999581813812256,0.0,0.625,1.0 |
|
182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4833984375,0.6015625,0.68408203125,0.599853515625,0.0,0.6328125,0.890625,0.599853515625,0.0625,0.625,1.0 |
|
183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46728515625,0.60205078125,0.71142578125,0.599853515625,0.0,0.6328125,0.90625,0.599853515625,0.0625,0.625,1.0 |
|
184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.45947265625,0.6015625,0.6875,0.599853515625,0.015625,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2255859375,0.541015625,1.0,0.599853515625,0.015625,0.5390625,1.0,0.599853515625,0.0,0.5625,1.0 |
|
186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5185546875,0.60107421875,0.66650390625,0.599853515625,0.0078125,0.609375,0.8984375,0.599853515625,0.0,0.625,1.0 |
|
187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51806640625,0.60107421875,0.6591796875,0.599853515625,0.03125,0.609375,0.921875,0.599853515625,0.0,0.625,1.0 |
|
188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4052734375,0.60107421875,0.7265625,0.5999581813812256,0.0078125,0.6484375,0.921875,0.5999581813812256,0.0,0.625,1.0 |
|
189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48291015625,0.6015625,0.69677734375,0.599853515625,0.0,0.6328125,1.0,0.599853515625,0.0625,0.625,1.0 |
|
190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48486328125,0.60205078125,0.71630859375,0.599853515625,0.0,0.6328125,1.0,0.599853515625,0.0625,0.625,1.0 |
|
191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47705078125,0.60205078125,0.70458984375,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.18310546875,0.5771484375,1.0,0.599853515625,0.0078125,0.578125,1.0,0.599853515625,0.0,0.5625,1.0 |
|
193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5126953125,0.60107421875,0.6630859375,0.599853515625,0.015625,0.609375,0.8828125,0.599853515625,0.0,0.625,1.0 |
|
194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51708984375,0.6015625,0.66552734375,0.599853515625,0.046875,0.609375,0.8984375,0.599853515625,0.0,0.625,1.0 |
|
195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.44091796875,0.60107421875,0.72412109375,0.5999581813812256,0.0,0.65625,0.921875,0.5999581813812256,0.0,0.625,1.0 |
|
196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4794921875,0.6015625,0.693359375,0.599853515625,0.0,0.6328125,0.875,0.599853515625,0.0,0.625,1.0 |
|
197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47607421875,0.6015625,0.6923828125,0.599853515625,0.0,0.6328125,0.90625,0.599853515625,0.0625,0.625,1.0 |
|
198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49365234375,0.6005859375,0.6943359375,0.599853515625,0.0234375,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2392578125,0.587890625,0.9990234375,0.599853515625,0.03125,0.6015625,1.0,0.599853515625,0.0,0.625,1.0 |
|
200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.6015625,0.6767578125,0.599853515625,0.015625,0.6171875,0.859375,0.599853515625,0.0625,0.625,1.0 |
|
201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.6015625,0.66259765625,0.599853515625,0.03125,0.609375,0.859375,0.599853515625,0.0625,0.625,1.0 |
|
202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.41943359375,0.60302734375,0.74169921875,0.5999581813812256,0.0078125,0.6640625,0.9296875,0.5999581813812256,0.0625,0.625,1.0 |
|
203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48681640625,0.6015625,0.69775390625,0.599853515625,0.0,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4970703125,0.60205078125,0.69384765625,0.599853515625,0.0,0.625,1.0,0.599853515625,0.125,0.625,1.0 |
|
205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.5,0.60107421875,0.68310546875,0.599853515625,0.015625,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.134765625,0.57470703125,0.9921875,0.599853515625,0.015625,0.5703125,1.0,0.599853515625,0.0,0.625,1.0 |
|
207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51025390625,0.6015625,0.66943359375,0.599853515625,0.0078125,0.6171875,0.8828125,0.599853515625,0.0,0.625,1.0 |
|
208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5029296875,0.6015625,0.66748046875,0.599853515625,0.015625,0.6171875,0.8671875,0.599853515625,0.0625,0.625,1.0 |
|
209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.44189453125,0.603515625,0.73583984375,0.5999581813812256,0.0,0.6640625,0.921875,0.5999581813812256,0.0,0.625,1.0 |
|
210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.45263671875,0.60107421875,0.69091796875,0.599853515625,0.0,0.6328125,0.8984375,0.599853515625,0.0625,0.625,1.0 |
|
211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47412109375,0.60107421875,0.69677734375,0.599853515625,0.0,0.640625,0.921875,0.599853515625,0.0625,0.625,1.0 |
|
212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4755859375,0.6015625,0.69384765625,0.599853515625,0.0234375,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.23876953125,0.5595703125,0.99462890625,0.599853515625,0.0546875,0.5546875,1.0,0.599853515625,0.0,0.5625,1.0 |
|
214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.60107421875,0.67041015625,0.599853515625,0.015625,0.6171875,0.9609375,0.599853515625,0.0,0.625,1.0 |
|
215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.60107421875,0.67138671875,0.599853515625,0.03125,0.6171875,0.953125,0.599853515625,0.0625,0.625,1.0 |
|
216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.408203125,0.6025390625,0.7666015625,0.5999581813812256,0.0,0.671875,0.96875,0.5999581813812256,0.0,0.625,1.0 |
|
217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.47607421875,0.60009765625,0.6943359375,0.599853515625,0.0,0.625,1.0,0.599853515625,0.0625,0.625,1.0 |
|
218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4775390625,0.60107421875,0.6884765625,0.599853515625,0.0078125,0.625,0.9765625,0.599853515625,0.0625,0.625,1.0 |
|
219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47412109375,0.60009765625,0.69873046875,0.599853515625,0.0078125,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0 |
|
220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.10791015625,0.603515625,1.0,0.599853515625,0.0,0.609375,1.0,0.599853515625,0.0,0.625,1.0 |
|
221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51220703125,0.60009765625,0.67138671875,0.599853515625,0.0703125,0.6171875,0.9609375,0.599853515625,0.0625,0.625,1.0 |
|
222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51513671875,0.60009765625,0.67529296875,0.599853515625,0.0703125,0.6171875,0.9296875,0.599853515625,0.0,0.625,1.0 |
|
223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.37353515625,0.59912109375,0.77978515625,0.5999581813812256,0.0,0.6953125,1.0,0.5999581813812256,0.0,0.625,1.0 |
|
224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 |
|
|