Qwen2-Math-7B-Instruct-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
819a90e verified
{
"metadata": {
"ParamSize": 199,
"ParamBytes": 15231233024.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1089994752,
"records": [
{
"name": "lm_head.weight",
"shape": [
152064,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1089994752,
"byteOffset": 0
}
],
"md5sum": "c2502ec6a41dc8973053839492aea942"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "675a4b1b6b16dae20eeccf1158f01adf"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "0c4526a83ae57f54f8d7f615d37b82f8"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "ddcbcc15562741002e9a5de64f443c29"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "e478c0a6d68c784da403672b92ef4da5"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "6e123e124e6d4fab6ccc82b39416918e"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "356b3a113f5120a3921b2263d9fca7a5"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "51a709407ba10121e9429de6e528c6ba"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "09d76009ba425b895251266943570243"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "9dc99bfad4b41a4cfc02510985bb7727"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "c55d3f8e86dfa26bee647efabd46b433"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "39b943c6ac6a2df839a517f999fb0f80"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "9f86222b569482c7099d63fc6777b4c3"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "2b4321e318c3a15201a03927d689a68b"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "f4e0556f8ba632bb2b3173bb66c154cd"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "41c35b3d26322e3867add3a673ca7662"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "af3581bac812d0385dceaa6f5a736fe2"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "491f5cbfd0dbf151a33c1e08f873d63d"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "779faec480645a722966774a47eae8d9"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "38c50f3532a97cfbf6f7d56eae574373"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "0c8ce94491f8c17a4420d9930481143a"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 1089994752,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
152064,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1089994752,
"byteOffset": 0
}
],
"md5sum": "66f04695d60e45846198a9fc8bbdbf63"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "4f52f9f92c78c26a27554ad4d4f7cd78"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "047ff44c3913aedc4e3cb60fabe26395"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "17bd040a9fdfcef0b70df8b94d663146"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "cbc350b9c1b7d99b77ab4daa8c18a8c1"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "86c3b4fa693028d945d612bd77df867d"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "a44dfa4da27107971118f3f6de914cc3"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "660e2cb41b048094839a76bc91bcf766"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "a62550607387cec8a5624e33e539dbdf"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "67c202106bcc4d9e9f973dc3620c3456"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "8209b9444df633ec858a00f347ce3d42"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "b7c7ba77d4134ba23cd2451435f11953"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "6214155dc3b560fdf2c5dcc09db8b6ee"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "14ab31cefeb686af50d149186d1690d0"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "df9ad719fc8f4cff74e3e3554c4987b9"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "ded2ff08c5f22307e2da9aded7168372"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "d1c0f39d1d42bbfc0db6945043489542"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "67226ee7497c589421e415aa48de26a3"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "fc17b54394d9fa73f1a74f5ba755b1cf"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "7c04436e3dd64eb3f6f3fe70affb4190"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "6d92b552719898265da0417fbec6df8a"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "40bfbb1540a5b3f35db930b4b23d287c"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "3bd8d122c50fa1ce7ef33a09b13d0592"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "4f5f6e2bb0dface0c5dcd9d48db8e8e6"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "6f92ff2695f1ac90ff1808ecab35829f"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "000c065bfbe386440822d19bff0e14ea"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "909931a63f829f055e7df6e3cedd2c54"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "429607b5aee6c69b2973d8cdc43c4c22"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "b4d2a3f4cb9d5d203f693de4b28126e6"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "0a6faa7f7301867ac8b322af1fe2f3ff"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "7f085532b0565152cebcf04912bdeacd"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "6c19ff9199412042eb2cb95051696137"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "3d5cbec0bfc7b38115f4511ad6337afc"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "71a76ce1c40b618856728e768b98e157"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "e61205386f34a9855c8224f68d9006ed"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "8caf6e2aa065afe2addb1f2ddd098060"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "def53067ed2e372eda7de0f7be940cd9"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "12b1137e3ee9ff8beeee2581d84a4d2e"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "859af0399c953cefb05963bba52d8885"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "1179eb9d4328ea92c77e0ec4ba367ade"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "2fc54b5877427a4432112eae73e1e42c"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "4d8c3dcdad81bc8bacf29e77e1f7e4e0"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "3652c850925d85415cdbf355a247e5d1"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "1619e5748e6f2329314461223af0a515"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "cb86ea0cb94d200b2b2007a5650b96b2"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "1e2bfce00efc6afdcad927f38ddaa905"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "ac51754adfec2a7cb1f3fe29f1c8eace"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "816c79232d4020fb50ba0a85b34c73b9"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "b422cb602e44d7db2589a10b70c2c61d"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "b2b06ef2c7bcc5d0c65ef0f73abda2f9"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "a9243b2cbb21c1c54a459199adec764c"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "da29e68517369e509ac028a0ba371270"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "b5958ca34b0b400ccb4200bfdf991e79"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "1c0f5266bcf6af0db604c1cd364eb310"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "c3d9f8c21204805685d6c91aea42c22b"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "d3dde135be29037c945852e84908ae94"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "89a561065f5157ee47f872fafe511a89"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "f9ff9c3ce6ea01791249939bdd98a0fd"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "6ae0b595c827ff6cfeea6579d9ef63e9"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "d8b7a781529e276eb2e96b621c00ef3d"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "075b4fc6646a810448abae253c0ba2dc"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "353dcd42f8e1834567ec32ef1abe2334"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "aba80261f6f8c8f29d35a759fcdcc920"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "b5d79efa61e631ecae15b1b2a0bcf2cf"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "b5089f8d94cad383e83586f59d996721"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "75b7d632b63557dc9d412c416fc4133e"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "6586b387fe997f07c1937331d9a56056"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 33546240,
"records": [
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7168
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14336
},
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 23552
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33053696
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33060864
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33068032
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33077248
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33084416
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33091584
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33100800
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33107968
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33115136
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33124352
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33131520
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33138688
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33147904
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33155072
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33162240
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33169408
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33178624
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33185792
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33192960
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33202176
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33209344
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33216512
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33225728
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33232896
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33240064
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33249280
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33256448
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33263616
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33272832
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33280000
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33287168
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33296384
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33303552
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33310720
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33319936
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33327104
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33334272
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33343488
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33350656
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33357824
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33367040
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33374208
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33381376
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33390592
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33397760
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33404928
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33414144
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33421312
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33428480
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33437696
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33444864
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33452032
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33461248
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33468416
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33475584
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33484800
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33491968
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33499136
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33508352
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33515520
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33522688
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33531904
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33539072
}
],
"md5sum": "7e8609760bc59a2f3f3a903d924502dc"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "caa8bb06f0c4fd855f56aa3c2a8891bb"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "daeb9b8f93dc5f029fe15eb9ceb8e993"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "d582991a932fcef8cf82d4ee1dca6fc4"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "e57a7af242c374c8510be32a1d85b978"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "ad89f31957af8b8142c1b47bae3c9cf0"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "3a22cf338fb88ab04321e928f05036f0"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "4a981414eeff2a1178eddc2f8cf97adc"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "477a037578e3f2d3807b10c5e65a35dd"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "af98de65de353f16817e9d771de2112d"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "294379ac3b240c3b2da1dba7cf9c44a8"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "ca00dc924b6b5efad975cffdd3528e84"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "aacd14c428654a9452e42be8f03b9bf6"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "84f7805a9cc56d3272262d2e597ae29f"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "f1312904cb0f92d5a58c38d126b5d535"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "cbba6822c254796a64b82d0d9a596de4"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "81f4a7561484152580a4d5297ca3061a"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "3fb9f81ca4ef3ddc0cd6fb4875a3550f"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "1b64348ddf012b9087e7fca6342c38b2"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "bdd283aeabb24f0f65355cd6bb3ad946"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "d4e7480a915072f2164044580f03d652"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "e1752905aa9b73ef460fa24e552a2a64"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "4f59ba38aa1db245d530e05c96aba53c"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "8e0455b9adcb33a4c63abdba928ad069"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "28aea622a34a22635555e13b991eea39"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 33180672,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 9216
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33039360
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33046528
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33053696
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33062912
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33070080
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33077248
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33086464
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33093632
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33100800
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33110016
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33117184
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33124352
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33133568
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33140736
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33147904
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33157120
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33164288
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33171456
}
],
"md5sum": "43e47e6155e9fed36be63db87913058b"
}
]
}