{ "metadata": { "ParamSize": 283, "ParamBytes": 1656837216.0, "BitsPerParam": 4.069857945300451 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 197004288, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128258, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 197004288, "byteOffset": 0 } ], "md5sum": "bd0cb4d3ca0e4ef542c45618c24a48bc" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "da592b21f89f844049e7b93d38eb5c94" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32913504, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128258, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6156384, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6156384 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 6162528 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 18745440 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 19138656 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19925088 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19931232 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 27795552 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28041312 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 32759904 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32907360 } ], "md5sum": "60a9ce2c5ad19ffe69e0dd8856023b05" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "77e4a994a4190bd54f9917e366a0fd4d" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "5b80c18f231c4f9f19cdffa54db101ec" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "134961d5eeb283cda8cf87ab10419ca1" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "0f762dd1b12d4c751db8c78cca22b670" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "36fdeeb0824cac63a5bfbf6175404e24" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "b1771313446d4efc8bde4464a896fb7f" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "abfff04b4c3c577344c3580e85cfc925" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "e75f58b61f4f03ba0d236e87a2c21214" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "855924c9e67ec4263fcc63dacf2d4b31" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "321181dbf9166d9af1edbdc1d612b552" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "56e37722c422efe5f82b8d548e13f287" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "31a9b007b9ca3c7a4666191d57c973d1" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5fed9273a2bead9d11405149e0f1ce53" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "0e0f8b7a9edbecdf74b56dd731181488" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "393b2cc47e8ba270c6c3ab119bd6cdd9" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "7ccffdc5fc370eb0c56cfe28b9f4fbf2" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d05598471b687cc2d3461756cbf19ec3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "f1177002bca76b83c2ade0f0660bbcd8" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "eb389bd05d30df8880f5cf703243eb84" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "f6ea4673ade61d01761d78dcc55413ee" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "206c55197e9db8b003ce20476fe4b89f" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "6ec2e5f852c46473121c96371a1c8987" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "151852fdb932f25782e5ef31bcff6743" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1408adff0158e80111d7e48e5acd7f5a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 27531264, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 26744832 } ], "md5sum": "53e277b716aa4f27421d117b45568cfb" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fc5650e315c27f40bde49beabea9a881" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "48dd8553fa43acaf7f6ec74832f5f037" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4c2d8e0a4c60566ec21a2b3569c39b20" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "84db123c41d7b763e3d4ce5ab7f514cf" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6697e9f28b414d04e9c1bf5f0016316d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "d166aeff2f06e6f5f3318c80e1e8e561" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7dc72902ea9a4561fc645b4cdc50e4f2" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "11c6ca5286fa280ab30ab1283a624f1d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4c32e454f96bd5b2014bcfb3732f9c8d" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "6b677278b8e90c0de8da9af401963df8" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "607e17122f8a32d3b75798fac6529a94" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "dc752eee7d99b7276fa53a1f69c13ad5" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "849f4554e75acb38029295ed4d2a621c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "5c57e1843966b4aff67a2e43d0a4830a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25970688, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25958400 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25964544 } ], "md5sum": "99fd4ebb1c6f6adc1f0ccdbab21a8359" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5809908d76650a0208191156a5d1f809" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "c873aff8620613612b9ebfcca40f0839" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2579a3d7a3e8557e278331a7af2bec89" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "4cd57141c1caec219e2165b70d66c07a" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "08fb2ecdf458b2da31d0e74ab997c2ca" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "0db11dec314a29d7445cb448b1c7cee3" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5365fb6708e6debf7ff36f63e59848a3" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "1857a20b0f34591aa73b79d97691d44c" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "775252735690638db35d1d04d2593e36" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "b39a8f84a2b387efc08ae797ec358a0c" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6e381f308a749ad0455bb2d2bcd47d7d" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "c3b4c647ebaba7c00d768d814589aa45" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "eda0fc6be4ef81341fbb34ba16a59645" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "ad05de8721ce948788a3ff878b7fee14" } ] }