{ "metadata": { "ParamSize": 313, "ParamBytes": 3879214080.0, "BitsPerParam": 4.07500989461587 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 272498688, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 272498688, "byteOffset": 0 } ], "md5sum": "e31688f707f2f116124d87be9c5b2f79" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 34062336, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 34062336, "byteOffset": 0 } ], "md5sum": "f15e1c34f09c298ff581a537ff66ecab" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "65db05fabb9eca3bc5cea7916f74c85a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "ede83f5af023d4b21c96c42de4e21975" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "af2eabf0c47eee43ed1c30c6552bbaaf" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "e166556c756223f3bd49df9f4e0fd3ff" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29615104, "records": [ { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 9216 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8266752 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8275968 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14698496 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14705664 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14781440 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14788608 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14795776 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14802944 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14812160 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23069696 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23078912 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29501440 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29508608 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29584384 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29591552 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29598720 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29605888 } ], "md5sum": "092513153d87308185451fb37fab4dda" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "de19625518d6e29701db2a6452295ea8" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "dec90d137736beb8dc5c172ad3338432" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "bfe3c542bdd5d61dd007069afed49094" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "b608783c27572c2273f4609d58bd68c5" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "32a02cefac996ddf90675f4478e806c2" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "716abb5b6d7b23e2b863fd7d0c95fc47" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "04d0ef721626790769840bf87fef7e89" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "ff6d4f35f2a45eed453582d4a9c4917c" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "2e6289f55a037e98a471a5fa621e55bf" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "6f3abb2b123faa9bf04caaccd0416f57" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "6cf72872b717e6c757b9ff56c018b6d8" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "3c5fe62a625511dfa341859d08be9a67" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "0f3179e252d3f311550876eb8c706fd7" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "1ce677fa5f7c83f487e81880c4580ccd" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "30a5369c0ceef0bfb8e6a496c4dc721c" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "b69dbdc38c1df85267c3a2f2b65436de" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "2563ccb5d422ab813c9de962e43f39c4" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "a07e849b915870bee2b6b39fb44c32dd" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "92ab1ac879c451e1f076a1a854436b0c" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "e8358caccd37a829ce4fdd1deeaedefd" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "aa76473a203925b55099d9e372c73fb2" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "d11f78694076073fc014816b2f939c6f" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "30a9be6bdb771cd1b187e4954741f2eb" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "3f60eee1e540d1f6191047224da0e6f4" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "7a00911114978ec23728f32f5e89aec8" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "1a73eb9f7853af728afebbc661e190dd" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "93b530cf5a1091e9eef9bccb358d91a3" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "9483359407807277a0eaf4b31cee0bbc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "9005490447085c048f658381b8e1e57f" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "f82e09d780ea6b775db9b61f1061798e" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "64634f86183b5bd65b6eaba9716d6574" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "93d04b6431aa8d9b30dfa9164600af14" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "68dfb660df2e61439d1c3beedf1d7893" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "f0852affa421d456c9d3c58a5a096f6f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "068663cd94c9f59bda7a7c8fce55398d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "798610b35a736af6b8894f0492d490c1" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "75e811b87b02a9a91cee92e8bbfde0f5" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "6e78f92056c9a85dbf00d174984f1929" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "aba695a6cf161a081b97142e603e0aa4" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "0b5ed982bdd3da65a59d1f2770e043c9" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "7a7cc969c7df278b04101f0006a57161" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "137f7fcef38d4b6124a433b5ccbcd3ce" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "6c791ae014115ed7c82c754d3e1c6f96" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "85774908f7a0f47dcbe30ed842acd12d" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "430056cffd91c87a73f953b3ee219089" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "73d0c51189946585febeb874e69ae8f9" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "16e3bf38802257a1afea51a3dcc81a70" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "4e101099f3cd9013ce1efe57f648b943" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "83df3f9f61d977d03260053dfa8158b4" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "daeaeb7c3100d6124106196e8a04db77" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "840435f3a78f7994d104c1eb1f7d8010" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "fb48c038ef825b044ac2ab0ad2c0a673" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "c52a17b91b3ab39e31531744e93ed53f" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "090c7b7b3358e4e112c4101d05641c95" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "420645d630c18cd06e523cb3bef9865b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "42d512f8568e6f41e56bf74e9b5bb620" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "d39f6ff6e8aa864a22052f2a6981deee" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "d5958f63fc11c2aed977e16fa2a64b9b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "6c090b3947e1f882e9cfb8429420dc9c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29605888, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 29596672 } ], "md5sum": "6ee7234308b97a59e36dc43efb577dad" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "e0a40ff53b83009d749803058b4cda78" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "87aa58847d9a6d4b48124bfffc9da0b5" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 3584, 18944 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "6633a114e92a018ba7eb2e0e5ca821ad" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 18944, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "7c394d2eebdc11d6d5322f23fdf04131" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 272498688, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 272498688, "byteOffset": 0 } ], "md5sum": "99b263b1d276d84a055d7219935c6fbf" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 34062336, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 34062336, "byteOffset": 0 } ], "md5sum": "5c2258bb0a45ba1f907fff20c5703df9" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 29603840, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 8257536 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 8266752 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14689280 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 14696448 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14772224 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14779392 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 14786560 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 14793728 }, { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 3584, 2304 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 14802944 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 1, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 23060480 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 23069696 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29492224 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 1, 37888 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75776, "byteOffset": 29499392 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 1, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29575168 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29582336 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29589504 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 29596672 } ], "md5sum": "ff32f12c81ec50a8a34b853aeac5ae8c" } ] }