diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4431 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3790746112.0, + "BitsPerParam": 4.500454373320414 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65538048, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32001, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 65538048, + "byteOffset": 0 + } + ], + "md5sum": "0b3ebd70aca35d52e03c2d5018a8c969" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 30744832, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32001, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192256, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8192256 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 8200448 + } + ], + "md5sum": "9c1bc360f0ecf22788589a67853fdc94" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "dabc232391cb802d5ae7225e191f2af5" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 28196864, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 2818048 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 2826240 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 2834432 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 25378816 + } + ], + "md5sum": "af0bdc8ae4dbb34ed9b6443616b25b7b" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 30810112, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 5644288 + } + ], + "md5sum": "fac5d884674e80561259a7af8498af6c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f3c03610dd66d0ecac94de161928e87a" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d55ce09904032b3e0e5b1db1f347aeae" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9c247ea57456c0bf42c2dc45b326cb50" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "f09e02b7c2a945c8ec9c75a792ec4992" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b998ca4be1e85ca59447a157d57cdf34" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3c4146adf4307c25cfdebf4cfdc638d7" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "2f63d727d22e6580e733c5091ef8657b" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "1e40b3a7d343b395e0214a074b2bca48" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "526fa2fc34fa154ccb90c3cbc9d375d1" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9853e78ed5658d555ae2ef7e2dee1471" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "29f4025521308e187a349323225d138a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f3b4df025c50effe90ff82d7b2782d45" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9fd0051093d358059203e429380f5481" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "8bb87bd11542c42745ebf6eb6f0b161d" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "41ee252c58b75257414eeeac3541c896" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a5ff5bb6c885549542a251a546b1cfd6" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7fe1f5947fb5bb379ec79ecab34ac8a7" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "bda9c0be12ec9d7df1da99fe958e08fe" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "868ff6271b69ae977d29dab70c5b0b68" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8e7173a68ba9cc5163089c5367c584ac" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "9a5bf48b8df43210b4b23352bb044c69" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "72c731a34eed54671ac7708f237f55da" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b70b24cdf30bee449e39c555bf3d8b45" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c3088ccb7ed96882e320338472a41f56" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "45110eeed5ba47df174a762b81dbcf7a" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 65538048, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32001, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 65538048, + "byteOffset": 0 + } + ], + "md5sum": "8ed02b9f78e2e5592f90392d394bafcf" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 31801600, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32001, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192256, + "byteOffset": 1056768 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9249024 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9257216 + } + ], + "md5sum": "64e01f1ea8261899c565ad6360835981" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "aca70bdd5a543ee4405a02cfd78c5e85" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c0d17c957c83c1631e55889b3249e6ff" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "950eaffe8ce5c401c2b785f8189e183f" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "81649a612c582acac7fc7361aeb54a7c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1b4e3989738996821333ac9fb8e12684" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "b4d5b46de3a8412dfd786a339a135be2" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "22710b0e21eff23ea2864f7c0c6f52bd" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f71dd7dc8e8158bd06bd4ed009440007" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "daed3e318bf8c7086b4f02d7ce485ae8" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "cc986aed592d5db140336b3c7ee8e848" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f57a7ab19e23241e01f267d8428b0a5c" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29827072, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 21045248 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 26681344 + } + ], + "md5sum": "69f2d4853bb9dc6c17000fac0ec9699e" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "58efd9b9c02ea60f2927a91b6210aee5" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ede69d1ac98e239b39b69bf1f4755a00" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "66774010679fe3b4e37d8e55090aaffc" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "ab88a106ca1130094d27253c241b21c6" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9a680e29dd6ae9de019b467bb8b5075b" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9682049cb8728e73c0d44b51e055d300" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "6e42a99181dce715434e9fe3d7e6a483" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "a265590f56bdde0992fa6bb71dc174d7" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "82a3799b6312b4331da0586b1152653a" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6fbf365acee1417d03f5793508fa6bde" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "2ddcb52d7820022b40f829eb1618f9ad" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "cbb3be1ae75782c2f42d44ce19fad1bb" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "245d5e7b4dfee527db104c1865b0e547" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "35d2b115a8422c007564e9bebbb5fcdb" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "6c88ea7a9858c21d5a8abf8412112682" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "e33f151d19b1a2cc04ba96d060b716b4" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5ceafadf2260c427417f94d76862e7e7" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "0f9e6f28731bc49ea73cc2d501e73b45" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "0e478c490776773d77fbb2fc15759525" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "deef0080d44f62d4921370d3ec358771" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "83b6db7746123148a40f82ad3ca45e4c" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "79a66aaa92f6e442ce2ca6f2980b6382" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "26523fa0b68ab48fc5df47bbc4621ca2" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b9227427bf37b54af600038d3406298f" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6cd54d882465841f565ee7c509975184" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9ab9a2ed40976e80905479a675320e9e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "25c9aa96de13018b29cd99977dc3c4f9" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "208cb28d3fc49c0523721e56bdd55ea8" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "0548debe814b50378235b0950a25fba0" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "35b302f56835854854b59180f5fdeb20" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 28196864, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 2818048 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 2826240 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 2834432 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 25378816 + } + ], + "md5sum": "0fd67bebd928493b132931ba9b2510a6" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 30810112, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 5644288 + } + ], + "md5sum": "26ae1730eb7d58c245392080a1a813c1" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f7b6b5f1a56df4e75d666e4b66df900b" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "57299857b79cc2bd6855159876fdf8e8" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "55addba4025e13bb8a00baf008683b0f" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "911f9db742526e12af92ea8db69ae330" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ae90902a05f4a3bd4679c71a499953a1" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "672bc12b8de3e4b931c2135226a0cb08" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "48db3e3d83bf002877fc456578ce0d84" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "911d424186f615e605269e4efe092fb9" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c120464ef04d29428b93298388d6f71c" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dbc27da3977b84a325d92d301386234f" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "313259244061058f92ed11da88027538" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2f5ff300fbc97879b7cf65440d6ff809" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d79122042fb9e249e9953bea936e8aba" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "3f54dd363a6fcff28f8b61735722140e" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "bb7bfe44f6cb854416bf724bd0c8ef1a" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d87489d490395f7b2f91cecba207a874" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "77b4ffe5fbc320b16d9012361722bc83" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "524d9257497efc82bd3d220e1126b5dc" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "16678eec83b3928def8d390a63d16e0d" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0f0327ed1513c32817900802bffff7bb" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "81c675385269314d24a4d19e4fb56fb1" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "3a72bbf6844b7f1902b910c8c7427720" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "76b66684d45d065f3b844d2844c81bf0" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "74b4aff75aa8b52deebeb1e13ad59a67" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "2aba209ad880528cd6f4e7fa4b4530ed" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "04103804284580073d29a36873520a8b" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f71b53ad8aa0e45e1571127465c899b6" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "96183f3334045c81e7ddd5fa5babb459" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7e993afcfbe46bea9f5395c6e54dd0ab" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8e792e6c7e64b8302adb3dd65fc31997" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "627b9d2e53d49a76b02fb201614bdff6" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "0adb21d777bcf6d0460675c00bc151e7" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ea8b4b117e808c5c3c9ff38b71bb0250" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0b55df005f5f200c5bf9cf6e91ee168f" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "4d30991210103b5dd824e9696b0a4761" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b25de963e65fd39c3fa7b28ab224f32d" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8f0e78c70b05356916e08bceb2f927a4" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 30801920, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 18219008 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 21364736 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29753344 + } + ], + "md5sum": "ee7447efe20ada46bf3bef3e300a5dd4" + } + ] +} \ No newline at end of file