diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4183 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 2149644288.0, + "BitsPerParam": 4.500600961055312 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 49250304, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32064, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49250304, + "byteOffset": 0 + } + ], + "md5sum": "60cb49fc9e15a1a503a8557939c9acbf" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "67eab4fcf9cbd1606df780ac17e8175b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 23470080, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32064, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6156288, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6156288 + }, + { + "name": "transformer.h.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 6162432 + }, + { + "name": "transformer.h.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 18745344 + }, + { + "name": "transformer.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 20318208 + }, + { + "name": "transformer.h.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23463936 + } + ], + "md5sum": "cbecde75bc0db08c55cfd2eed4f324f4" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e8ceff6f44e26265e9f6fc92d09b9159" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.21.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.22.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "eb4d887bb534c97c5acb188b28056e54" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.22.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.22.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.22.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.22.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.23.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "2a0028965aa9bfa3d141b881e1c0a324" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6383b04e4d19378c72106ee386120f35" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.23.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.23.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "78bd8aff5d975108f094e99b7ff6eda9" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1e764fd4a60ed88577f9e9b77bc08619" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.23.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.24.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "00faa3c4a431597fd86d90a3a9f72274" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.24.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.24.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.24.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.24.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.25.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "6c92b61b8fffada7c9ab35f57938fff1" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1a85d0b2174d6745ae44b944b90e7bf1" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.25.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.25.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "9060d56da7fa65f6af71fa488be422ba" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1c5dccd984a028491ca2f98fbb5599a4" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.25.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.25.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.26.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "325453b9bf2fe32e6f0b1e947a74db18" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.26.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.26.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.26.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.26.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.27.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "64902b5098e08e8afedc553f87d03cab" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5fc58acf1755c56a959878dcfe50573f" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.27.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.27.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f3f7f8a759eafcd68a0704e88d97f7c7" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d9c97dbff01268aea20cbbbddb2f8daf" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.27.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.28.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.28.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.28.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "f3c8f62749957c85a1ec17d6fffe296f" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.28.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.28.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.28.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.28.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.29.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "d3c67c05a3d189c9387eb5c00ebf657b" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2f419fe0426474a07abbf77beba33d22" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.29.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.29.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.29.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "73bb70054a46237091461c767a90f184" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e364e2b8c05055ffc4ddbd339888fa7b" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.29.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.30.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.30.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.30.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "53f088ac23e3f66f2f4fee944f2ef8e0" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.30.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.30.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.30.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.30.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.31.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "cecd96d40d8999f779817017f6a7627a" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bbc282217aa869e3b2b74ffba3f77ab6" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.31.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.31.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.31.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.31.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "6a60fb3029f5905554a23f2b2c450e29" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 49250304, + "records": [ + { + "name": "transformer.embd.q_weight", + "shape": [ + 32064, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49250304, + "byteOffset": 0 + } + ], + "md5sum": "4fb941277e242c4b80b4962950cac0f6" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 22093824, + "records": [ + { + "name": "transformer.h.31.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.31.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.embd.q_scale", + "shape": [ + 32064, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6156288, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.0.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 22087680 + } + ], + "md5sum": "204e1b7b44795229dbd211cdd7c0c874" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "60284f4bbb9aa062a1cad7c88d450326" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.0.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.0.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "1d732626957bda9cd13a25d9002978fc" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8b8c80c26c413541b633545686c9ab80" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.0.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.1.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "f156bc8c39bafc7e8d062ac22641ff96" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.1.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.1.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.1.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.1.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.10.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "eb95dbaaf3d1ae5bf658084b4372fafc" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0d623bf57406c64bb861971099e3878a" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.10.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.10.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "b0b125d04e4cdd73f14f0f1b92cbe352" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8d91c01c2610dcdef0034b6d74b9f083" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.10.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.11.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "1277d25ffc04bd8db42bf50b29019276" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.11.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.11.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.11.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.12.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "ccdd0555ece64a13f91f0193ae115afe" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "71c6b534184163da0973884b68627ada" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.12.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.12.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "37bec1479f549bc4a4fce453359a0b18" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0a32234aa7f4f1720bf5b2ceda686e8f" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.12.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.13.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "38982e1ba4416a9c1a43fe23ef6a46b0" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.13.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.13.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.13.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.13.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.14.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "88066c8a3cc31f8da54d2dac1c888176" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "274fe6271f13e75198e06c1dd0f6fbcc" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.14.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.14.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f011ae0479e1ae9cadd65487086b8ff2" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "cbf764399d66945ba9e7b6d2b9b747c8" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.14.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.15.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "2e08df9fd290a6a5f08c44c568ff529c" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.15.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.15.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.15.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.16.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "9b0f0ff3eb7b62dd9333d83086312a08" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "426658c8be2e813117593dc718f7ece1" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.16.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.16.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "a03418777dfac087a063e6e2b72d159b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f18103d0f2dd174586525efeb592469d" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.16.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.17.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "915b83cad2a141e9a09b6e925de00a34" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.17.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.17.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.17.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.18.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "7ca33d293f0f08ae7b269aa04b0c4421" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1228ddf709245f77cb3699d8dc21cd47" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.18.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.18.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "88ba71c35188ba3a601bfc2b95df5801" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4229683ad46dd5f51cf576cf689ca2ce" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.18.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.19.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "c337a9b1bec4e93b592b0866fe0e782f" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.19.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.19.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.19.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.19.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.2.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "8e3e19f3e0e6aff075c743018630fced" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "71a8a1db3a46ba03bb2a1b225b1b7ba0" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.2.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.2.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "69d90a06c585ec0bf90785cfd72ef049" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9e17e219937c9a198aba2ecdfe11f938" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.2.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.20.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "79408e70bbac62546703ef028f4578de" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 26548224, + "records": [ + { + "name": "transformer.h.20.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.20.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.20.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.21.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21233664 + }, + { + "name": "transformer.h.21.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 25952256 + }, + { + "name": "transformer.h.3.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26542080 + } + ], + "md5sum": "f86ed1194a105ae629aef8e52287c590" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0f1e66e48a15b7fcbbd250503b0d1e4f" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.3.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.3.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "af9d5247cc55ec8f931dcc1d0b6fe183" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f2b9ee314c5c94d3c731d31656417841" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.3.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.4.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "997c2c628c50712b50d7f26c26084d3e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.4.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.4.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.4.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.5.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "4143b000c6ea0317abf7ce367d965a1e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0aff6bb46fed4f8503a0fe2640f36af6" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.5.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.5.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "91127a278d13aff4d42c6f29f1f09d47" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b14b9a1ecc2bf637a18604ac9bf702c4" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.5.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.6.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "dce47da18f429ae05574459293e396b5" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.6.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.6.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.6.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.7.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "5856ef4af8d4312f507505180b614fa5" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4ae17cb79f6f2c8296adfcdeb202a4f0" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.7.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.7.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "42972fecba07f6be38542ce92ff3b551" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5094f00a0e50ae507b92f70a183b3284" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "transformer.h.7.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.8.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "transformer.h.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "transformer.h.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "transformer.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "transformer.h.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "a3ec0d681ba40d39e13f68678c81eb56" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "transformer.h.8.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "transformer.h.8.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "transformer.h.8.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "transformer.h.9.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "43bf093dde1a08e0f83d30439be9f5cb" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4a4a978152943f8d142761654e04fe71" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "transformer.h.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "transformer.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "transformer.h.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "transformer.h.9.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "transformer.h.9.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "3e5b484d31b787c98c58431f4cfc5427" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 15925248, + "records": [ + { + "name": "transformer.h.9.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + } + ], + "md5sum": "bd9301b52f682db4db7e91df8cefc931" + } + ] +} \ No newline at end of file