|
{ |
|
"metadata": { |
|
"ParamSize": 313, |
|
"ParamBytes": 3879214080.0, |
|
"BitsPerParam": 4.07500989461587 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 272498688, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
152064, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 272498688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e31688f707f2f116124d87be9c5b2f79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 34062336, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
152064, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 34062336, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f15e1c34f09c298ff581a537ff66ecab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65db05fabb9eca3bc5cea7916f74c85a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ede83f5af023d4b21c96c42de4e21975" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "af2eabf0c47eee43ed1c30c6552bbaaf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e166556c756223f3bd49df9f4e0fd3ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29615104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 9216 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8275968 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14698496 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14705664 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14781440 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14788608 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14795776 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14812160 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23078912 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29501440 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29508608 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29584384 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29591552 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29598720 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29605888 |
|
} |
|
], |
|
"md5sum": "092513153d87308185451fb37fab4dda" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de19625518d6e29701db2a6452295ea8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dec90d137736beb8dc5c172ad3338432" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bfe3c542bdd5d61dd007069afed49094" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b608783c27572c2273f4609d58bd68c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "32a02cefac996ddf90675f4478e806c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "716abb5b6d7b23e2b863fd7d0c95fc47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "04d0ef721626790769840bf87fef7e89" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff6d4f35f2a45eed453582d4a9c4917c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e6289f55a037e98a471a5fa621e55bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "6f3abb2b123faa9bf04caaccd0416f57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6cf72872b717e6c757b9ff56c018b6d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c5fe62a625511dfa341859d08be9a67" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f3179e252d3f311550876eb8c706fd7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ce677fa5f7c83f487e81880c4580ccd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "30a5369c0ceef0bfb8e6a496c4dc721c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b69dbdc38c1df85267c3a2f2b65436de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2563ccb5d422ab813c9de962e43f39c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a07e849b915870bee2b6b39fb44c32dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "92ab1ac879c451e1f076a1a854436b0c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "e8358caccd37a829ce4fdd1deeaedefd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa76473a203925b55099d9e372c73fb2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d11f78694076073fc014816b2f939c6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30a9be6bdb771cd1b187e4954741f2eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3f60eee1e540d1f6191047224da0e6f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "7a00911114978ec23728f32f5e89aec8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1a73eb9f7853af728afebbc661e190dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "93b530cf5a1091e9eef9bccb358d91a3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9483359407807277a0eaf4b31cee0bbc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9005490447085c048f658381b8e1e57f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "f82e09d780ea6b775db9b61f1061798e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64634f86183b5bd65b6eaba9716d6574" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "93d04b6431aa8d9b30dfa9164600af14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68dfb660df2e61439d1c3beedf1d7893" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f0852affa421d456c9d3c58a5a096f6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "068663cd94c9f59bda7a7c8fce55398d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "798610b35a736af6b8894f0492d490c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75e811b87b02a9a91cee92e8bbfde0f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6e78f92056c9a85dbf00d174984f1929" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aba695a6cf161a081b97142e603e0aa4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "0b5ed982bdd3da65a59d1f2770e043c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a7cc969c7df278b04101f0006a57161" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "137f7fcef38d4b6124a433b5ccbcd3ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c791ae014115ed7c82c754d3e1c6f96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "85774908f7a0f47dcbe30ed842acd12d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "430056cffd91c87a73f953b3ee219089" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73d0c51189946585febeb874e69ae8f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "16e3bf38802257a1afea51a3dcc81a70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e101099f3cd9013ce1efe57f648b943" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "83df3f9f61d977d03260053dfa8158b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "daeaeb7c3100d6124106196e8a04db77" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "840435f3a78f7994d104c1eb1f7d8010" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fb48c038ef825b044ac2ab0ad2c0a673" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c52a17b91b3ab39e31531744e93ed53f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "090c7b7b3358e4e112c4101d05641c95" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "420645d630c18cd06e523cb3bef9865b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "42d512f8568e6f41e56bf74e9b5bb620" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d39f6ff6e8aa864a22052f2a6981deee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5958f63fc11c2aed977e16fa2a64b9b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c090b3947e1f882e9cfb8429420dc9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29605888, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "6ee7234308b97a59e36dc43efb577dad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0a40ff53b83009d749803058b4cda78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87aa58847d9a6d4b48124bfffc9da0b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6633a114e92a018ba7eb2e0e5ca821ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
18944, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c394d2eebdc11d6d5322f23fdf04131" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 272498688, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
152064, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 272498688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "99b263b1d276d84a055d7219935c6fbf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 34062336, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
152064, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 34062336, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c2258bb0a45ba1f907fff20c5703df9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29603840, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 8266752 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14689280 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 14696448 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14772224 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14779392 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 14786560 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 14793728 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
3584, |
|
2304 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 14802944 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
1, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 23060480 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
1792 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 23069696 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29492224 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
37888 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75776, |
|
"byteOffset": 29499392 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29575168 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29582336 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29589504 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29596672 |
|
} |
|
], |
|
"md5sum": "ff32f12c81ec50a8a34b853aeac5ae8c" |
|
} |
|
] |
|
} |