{ "metadata": { "ParamSize": 325, "ParamBytes": 4141007104.0, "BitsPerParam": 4.070121093571116 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262672384, "records": [ { "name": "lm_head.q_weight", "shape": [ 128258, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262672384, "byteOffset": 0 } ], "md5sum": "ec0ca2851615084283d1736f30a6db82" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c042f4edf5d58fe28fcc5dadd224578a" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ef6fa8940374fe644908f19e90d8458c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 262672384, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128258, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262672384, "byteOffset": 0 } ], "md5sum": "a579a063407189a057a32ee8b074bd6c" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "46e6c311e305072c41142036da6b6d6a" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cd0904a029480dac893846a0987eff82" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 21963008, "records": [ { "name": "lm_head.q_scale", "shape": [ 128258, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8208512, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8208512 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 8216704 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 9134208 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10969216 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10977408 }, { "name": "model.embed_tokens.q_scale", "shape": [ 128258, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8208512, "byteOffset": 10985600 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19194112 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 19202304 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 20119808 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21954816 } ], "md5sum": "454e7de6845cd0449f0c0571cc96f37e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bf3b178a18260a4ae7cd6490b16852c4" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c3177083722c1f3b7c54b86ed740922d" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "35df3193448ed1573f3cfbf3e18608ea" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "69e18821591edf2d80105709cfe414c2" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "31c46238a7883de9bdf8e2d42c8e784f" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "6f4a89ec5bbd98c8b924ee578e2abcea" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "15a3ab03c5b3c07e87850a080ca3fba9" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2f3ec5bc7c0267de1b80c9d591474f23" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "88cca267bac0175382a45694a357557f" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cac70d3b5ef45ef21050dce6e706cef2" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "28db7ded0ee7e56fb7c884fa1a34e674" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "3c91be453f2e76f23ac825e5eb64531c" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0d8b4841d65edb15cf48a3bbbc39488a" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "05808ce15a75d6af1d58f133115b4d96" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "52f9868951453ba10eba569f24204679" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2086368ad7dbf551e35868612deecc7f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fa4031273ce2f468013e563b63c69ebc" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "1c2c8eab5eb40e2f9586b7cd5a29eab8" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a376019618acae7233282312a7d064dc" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fdc6e766b0e7b828dbc13392bf20c35a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "e64fa0e31a1b72d2c72a68444d7f7e0f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "30b22d71dfce18a4ba73538206c83cb9" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f9df77f012bbf04233b3ecf2ca940e6c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 24379392, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21626880 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22544384 } ], "md5sum": "294e261d0c36df6acec33f8bf0e1eb64" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3b2d458b019b9230adfbb5a46ddd4edc" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e98e4ec5d96693e01c34ee6a0e428ac0" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "09865536ba22d89519a3948e85d8590f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "35c3f02201bee8f3b215be59e44a551b" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "23ce15cbf03ec1591ccc9747a148600c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "1ad620d05c0d6f303a5c248631e19824" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4bca1a257935c6495dc51e50349bd992" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0cfe32836d0117a6a5b5c7007359a770" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "ae842b71904da1658e82feb33b7d1a02" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d80a1d32d70605d72e12cf42b14fc0cd" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "14999a5c62e8767971c2133d8eaa2477" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "099d62745411e2eda141b660e03e178a" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5f81e0cd92f843a4f67c4729a38cb894" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "490293077acf6561686ae27568364553" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "340b039cf8df4fbfdff1c3a6aa7210d7" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5cc781dcbcae4bb06c8d7fc68efa41d6" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "48e7689c28995a374d8326c3e97d97c5" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "76a0b1345c2cb76137ea2d30f8b7ea5e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6345031c28dd2582dca3335ce199a3e5" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bc44bb19f666ac1a8b614378280bddf7" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "040de83ef85c433117f9ebdb79bac34b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1697a6d7a48c6b7915f632ae1851ec21" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f5efd17f727481b1e3ee8f439e4b60b8" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "02773d86ede6db593745c03fabc60afe" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "17db022c0764dd17a688695e19e02993" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6cc5e8a2e516cf1c0b8ce568a15d8eb2" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "26ec57a65d574ba566ec0bba665e7dee" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1441f61a3e20bff3546a12bf30a6ddc2" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ab52da15f9e266a493376d02fc6bb914" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "1da87f822781a87f60ae240eeb4bda89" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d91d6f8fc08be66b9ef55f49d8fb520a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a0e4ef476d600ea0dbbf404b6b276f82" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 24412160, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21635072 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21643264 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21651456 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22568960 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24403968 } ], "md5sum": "fddad52eb794e94586a383832be93986" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c76174e74f46b049863dfa133edbd9bc" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fcfb2c4ab39a2c627d2ca1cd2bebaa0f" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "00261b5a3ae50cfecbffc394672f0d5c" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "187ff600adba4dd47f65ecd05a231c6b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "81d67262e2b9436c2eb2dc67b85745f3" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "565d3f28f05ad0d27b057a79b2d07779" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3c02c13d56dacb961464645114db4ae7" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3d744911a199075ef5200f8748f24c3f" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "bce3229aad11df6c51c30030f7b7b8ee" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b38eed833970af76572d0d94cdee16aa" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b78243daa5f62cacfe4653fa4e49fe8b" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "8257a33cce5f6a6648052c21202fea4f" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "844ae8741475491a64f21ac9294a2fb4" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "79fe55a42bced6aed55abf05d38c213c" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "87bbaa99746ce2063ea5743a64f00e31" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "17e26027b4471b9e581e09a01d117720" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1c874c811c27b6ddef0a1f0993e6557e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "e8fc5496bb14122cb9af43121b033f57" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6f03a14448ce917b64f978507e8d2fc3" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4d51b9d49286f8681b451fe8305371c0" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "da33a29964a1686bf0bf8ff27d243743" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c588715040bd5152a3b3c1a24a5171dc" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7f14a169a5339b8507817abfd1bc97b1" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "a336001a3e3a74bd833e20c9dffc0fab" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5bca7c2546f2af2f3ca874911b70ca2b" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "38702bb10db47de19d9528b3d8d0cf55" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "36a29153303d65b72f47576bc576b80d" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e7ff73327a173aeb3d2d5245c0df89f1" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3f7077589f93e21a0ee0e83fa353dfbb" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "f269ca2a5e42e1e47e589b64c452ae63" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d6560006fef0bff634e3f54a496c6489" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2217f544aaea482000a68162e799dfb9" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "d0d5ed2406a52bd3f8f0e5b988ed9d86" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 21626880, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 } ], "md5sum": "89695b75341670d0eb287decf0b62e39" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 21626880, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 } ], "md5sum": "53f0b0e73b2a296dc2c0f304b692f661" } ] }