Llama-3.1-8B-Instruct-fp8-MLC / ndarray-cache-b16.json
ruihanglai's picture
initial commit
5177197
raw
history blame
No virus
193 kB
{
"metadata": {
"ParamSize": 323,
"ParamBytes": 9081201152.0,
"BitsPerParam": 9.046979540559027
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "lm_head.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "a6945d49d5d076ecd0be132b67ce3cc2"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8c79f9f063890532332f8da2589ed0cb"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "443a8c35a9ebcd27107a962a6b5d2e60"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "bb18c864a5537cf1c820595a3c8da431"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "859269ff14f85726d23bf3fad4778dc0"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "dfa0102c5071e96fc7b3fec573a12fc6"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d980765228cff4bca8d09d9e1bcebea5"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "49190d9e24712de781967667600e3cdd"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "acc3232a20ec7475890b94f02bd6eda8"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "dd7979ad65b31f1489b5b4c1749389ab"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "dd1f1a497ef1a285b39a91f580f12a69"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "37573df1ed1baa7fabe8238cf193281c"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "a25453505d8d92f0d91d92a7ee58a6b7"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "555c55e65f56d6a98c687a7414073293"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "52aed4740d128eb93fde928489476aff"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "efb95ab09e911aae0d10d411910ba789"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "820b8e068f99a25c0bf6961000d614b3"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "52690d569efe271f6e10af8b967b7d16"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2a014dce929e00158b40a35ed78eaa8b"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "50fe929ef85eb45869fcbdaa485a26f2"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "51edb418e653f51e37388c92254f1c55"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "94bc4dc51bde779b0b5837bda6ef1e48"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "a5d569a77a1e1a8631ce269965419f4a"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "004b0e6aa9e2e6164d73ad4a86ebb7b7"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "394b11a4bc9cc4f5c82f100516702300"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d37ba16417c015e75f9c0ecb41f99916"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "a400e47ce9f48128f395716498b56488"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "07c60265438aa529bb9d49f9003dc8aa"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "b05a33ba8679e50d121a19296ef73430"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fe9fe6e39b3daf7e766a97b0accac44a"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "6f937e0faca4d2967d4dcf30e5ace1d4"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4c5676e78a7af64cac3cffcb1b46be80"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "d0c30e46622b7a9b2e4de5291130ebdd"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0e431697fad371f03cf02ad332251ba2"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "1aad061590af2b2ea4d96f84681a1ef1"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f4f03fe97da5b1c2911947db1141bcd7"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "e6d7420d843ab6310dc6fc22dd1e5efd"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a562f55f4556fb4bdd28bc9aa03d9f63"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d2aa280429d1818f82eeeefed114ec43"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d1a28ac62dda3bf7d3f3cd0e27d94a5e"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "31e163ad7a68f16e96df71ad962877d9"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "65958024e515f228248504cc4df04a19"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "16d9cf8211d69418089a2bfef3076680"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3b6e03ee6aff26fa893805f1cd992afb"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "529650f6598d120ccc5b73fa83934488"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "17a455efe78cc00cdfec8c902227d577"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e20de2b22c9b1d0d49bce144b2d465c3"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "717099fe6f5453f366c7dd88bb852ea4"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "ccd6bba8648a350d9e02570cf4962d1c"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fc750adb8ae849ac35ecc34d7dcd5816"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "61cb9e3222eb2a69d3fdc30874efdd27"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b67a7bb780f7237681a5c744d9d96c78"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "683707e940ecda57bb68dc6842765d13"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "346c3396cea046ae89931bf78a3e1253"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "379c00addbb94e2a32c180280580e84c"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0c5e6d5762b6b00988d11091d75e0885"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "bf5a15162ca2ebfd500f07450e29118b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "eee0e77954364fb92068d470482d9558"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "7e28e8c0fbd38a722ec1ea8d52f6d670"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e8cb44eeb6e8d9673043649f7bb6de92"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "afee385d7a56db4ba16e2d4338ee4c42"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "92140513fc226e5b04ef8b1729afb6e7"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "73c41a3ba5b79c9c3d4bffaffc8f29a3"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "16f7cdf21b686c7e2e7bdd7865b63f06"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "796f8694234c10cb7f70efece86d12f3"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "131e383fef78dff08e81670c807cade5"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5060b3a75c831a2400dd0964b0581b9c"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ee0b69aed4827b160aa43e7c816d7b89"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "d8dd0f8ca7473f5ec14b8becfe8b84f6"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a383af93328fdfb651afc7ca478185da"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "26c5a94d72153d59e2882bdf743de499"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2612845126d3c9aedc64a130c41f4ef2"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "72c1020f23578326238a03735b7966ff"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "be43f2dcf01fe92c16c6fdde4578a332"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "0165db642c3924a116f6a1401a5ac637"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9f281f74f05ffea2375765db33d8696b"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "2f6f92f71f61ccc478e623d9a8e9b3ff"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e43d80a678bf4e56896ead72f276bf32"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "392c740d765ea4c96b9493feb37fba74"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "d64c984973fdfd7d06e351c3cb461635"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6b90407ec2dd6100382bed7f11b003a2"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "f548ee06ba954fc568ce8b75ec87ed19"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2b5af8e2a16535b06a26ea71acaea36d"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "ce4443881a0e04570a2cdbd2b6faa412"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "08336bcc0492b1b98db7db03ee54e849"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "df80a476928605e6acb9440e4dba49be"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "324ffd6ba8ef1197b271ae257d94dbad"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "20fa4e12dc4ba27bc856308cd57e66d7"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "5ce4cb53f657711729dcc54ab572ca6a"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9d9010e8f2eaff4e98a2c5043377c6ea"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "3fc4111ead3ce25a870e14369893dd41"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1b7d2288421481d15a3855013539ea02"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "ca4f844016c03231ee4d3679f6ce853a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cffe2e7008403dc9b39358572a05e049"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "fee8e0f2649a6a6cb1ecf90b5ea0274f"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "24a3d52bf0acf242430e94565ad89790"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "305eb9b0055af82cecb72a2e029368cc"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c9b4061fbf46d32fa71e56b5934da53b"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5d00551ddb2e5c5cf81e5b1a8c45f260"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0deed8dcc8050abf4a6fe21b3a3f215e"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "a354b3317c7b283c62f0a25ccd24366b"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fd90ff929543b2a40f379a7b6a195c79"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "b98eafecb641d082b37e528be46634a5"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "14889a87af60d074f8364af28e2e7d8e"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "0b8e5af5a33f2ea3584418f33a50c3ae"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "baa6fbd062d89c51d089910495901dfb"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "328decc74d93641fa9c16bea7e7b095a"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a769694c6b15113a19d0983f1e05d88e"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "fd837876a11c1392d04ba56ce7538d9f"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "734d71cf51c0458231752747b7a1f17d"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "65d2eed328271f325175167fe878cdc7"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "835f10c5a785548747edf40c329644d6"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "9e701dd471c50a60cbeaf9751ca3c184"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b79490047ce9371df8422d645af64968"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "91d8f96054db7a29034da491f2ad6eb9"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "703daf772565209c77c3d6090d146782"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "19628496ce6e0431aa505475391b4f33"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ee77166a7802d504f6d7cac33c35503a"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2f47f1c1566a9248e344fca90b7e2382"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "37f4ca216ef82f37946cc70aea85ac71"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "4b4abc11c84f4485441812223b4155c3"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "464c942cfa622474a66436bd48b56e0e"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d6c08465157a49cbdd9845bd5db1bce6"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2b3d61740600e64883b6bc37e17072bf"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "8f4149db52ba940dcfa9f66a8dff8ddb"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e45a5a68dca017a35bb4b54e61fd367a"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "87d68d6aca79f1e389967ae28d4865f3"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "d0a167a49b8d2e0ab3febca1d8a818ac"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 25698560,
"records": [
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 8192
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8194
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16386
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24578
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 32770
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 32772
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32774
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 40966
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25206790
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25206792
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25206794
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25214986
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25214988
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25214990
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25223182
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25223184
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25223186
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25231378
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25231380
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25231382
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25239574
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25239576
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25239578
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25247770
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25247772
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25247774
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25255966
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25255968
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25255970
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25264162
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25264164
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25264166
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25272358
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25272360
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25272362
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25280554
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25280556
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25280558
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25288750
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25288752
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25288754
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25296946
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25296948
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25296950
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25305142
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25305144
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25305146
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25313338
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25313340
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25313342
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25321534
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25321536
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25321538
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25329730
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25329732
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25329734
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25337926
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25337928
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25337930
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25346122
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25346124
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25346126
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25354318
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25354320
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25354322
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25362514
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25362516
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25362518
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25370710
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25370712
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25370714
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25378906
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25378908
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25378910
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25387102
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25387104
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25387106
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25395298
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25395300
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25395302
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25403494
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25403496
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25403498
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25411690
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25411692
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25411694
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25419886
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25419888
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25419890
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25428082
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25428084
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25428086
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25436278
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25436280
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25436282
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25444474
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25444476
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25444478
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25452670
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25452672
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25452674
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25460866
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25460868
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25460870
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25469062
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25469064
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25469066
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25477258
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25477260
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25477262
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25485454
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25485456
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25485458
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25493650
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25493652
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25493654
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25501846
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25501848
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25501850
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25501852
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25501854
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25501856
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25510048
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25510050
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25510052
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25518244
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25518246
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25518248
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25526440
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25526442
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25534634
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25542826
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25542828
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25542830
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25551022
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25551024
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25551026
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25559218
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25559220
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25559222
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25567414
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25567416
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25567418
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25575610
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25575612
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25575614
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25583806
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25583808
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25583810
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25592002
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25592004
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25592006
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25600198
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25600200
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25600202
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25608394
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25608396
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25608398
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25616590
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25616592
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25616594
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25624786
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25624788
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25624790
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25632982
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25632984
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25632986
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25641178
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25641180
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25641182
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25649374
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25649376
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25649378
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25657570
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25657572
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25657574
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25665766
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25665768
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25665770
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25673962
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25673964
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25673966
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25682158
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25682160
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25682162
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25690354
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25690356
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25690358
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25698550
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25698552
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25698554
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25698556
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 25698558
}
],
"md5sum": "a565ee54f0509be91e2ae031ef6743be"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 256,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 2
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 4
},
{
"name": "model.layers.0.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 6
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 8
},
{
"name": "model.layers.1.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 10
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 12
},
{
"name": "model.layers.1.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 14
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 16
},
{
"name": "model.layers.2.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 18
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 20
},
{
"name": "model.layers.2.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 22
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 24
},
{
"name": "model.layers.3.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 26
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 28
},
{
"name": "model.layers.3.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 30
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 32
},
{
"name": "model.layers.4.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 34
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 36
},
{
"name": "model.layers.4.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 38
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 40
},
{
"name": "model.layers.5.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 42
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 44
},
{
"name": "model.layers.5.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 46
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 48
},
{
"name": "model.layers.6.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 50
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 52
},
{
"name": "model.layers.6.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 54
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 56
},
{
"name": "model.layers.7.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 58
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 60
},
{
"name": "model.layers.7.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 62
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 64
},
{
"name": "model.layers.8.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 66
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 68
},
{
"name": "model.layers.8.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 70
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 72
},
{
"name": "model.layers.9.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 74
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 76
},
{
"name": "model.layers.9.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 78
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 80
},
{
"name": "model.layers.10.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 82
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 84
},
{
"name": "model.layers.10.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 86
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 88
},
{
"name": "model.layers.11.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 90
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 92
},
{
"name": "model.layers.11.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 94
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 96
},
{
"name": "model.layers.12.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 98
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 100
},
{
"name": "model.layers.12.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 102
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 104
},
{
"name": "model.layers.13.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 106
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 108
},
{
"name": "model.layers.13.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 110
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 112
},
{
"name": "model.layers.14.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 114
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 116
},
{
"name": "model.layers.14.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 118
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 120
},
{
"name": "model.layers.15.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 122
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 124
},
{
"name": "model.layers.15.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 126
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 128
},
{
"name": "model.layers.16.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 130
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 132
},
{
"name": "model.layers.16.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 134
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 136
},
{
"name": "model.layers.17.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 138
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 140
},
{
"name": "model.layers.17.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 142
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 144
},
{
"name": "model.layers.18.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 146
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 148
},
{
"name": "model.layers.18.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 150
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 152
},
{
"name": "model.layers.19.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 154
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 156
},
{
"name": "model.layers.19.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 158
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 160
},
{
"name": "model.layers.20.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 162
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 164
},
{
"name": "model.layers.20.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 166
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 168
},
{
"name": "model.layers.21.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 170
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 172
},
{
"name": "model.layers.21.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 174
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 176
},
{
"name": "model.layers.22.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 178
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 180
},
{
"name": "model.layers.22.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 182
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 184
},
{
"name": "model.layers.23.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 186
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 188
},
{
"name": "model.layers.23.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 190
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 192
},
{
"name": "model.layers.24.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 194
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 196
},
{
"name": "model.layers.24.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 198
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 200
},
{
"name": "model.layers.25.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 202
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 204
},
{
"name": "model.layers.25.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 206
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 208
},
{
"name": "model.layers.26.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 210
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 212
},
{
"name": "model.layers.26.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 214
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 216
},
{
"name": "model.layers.27.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 218
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 220
},
{
"name": "model.layers.27.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 222
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 224
},
{
"name": "model.layers.28.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 226
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 228
},
{
"name": "model.layers.28.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 230
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 232
},
{
"name": "model.layers.29.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 234
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 236
},
{
"name": "model.layers.29.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 238
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 240
},
{
"name": "model.layers.30.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 242
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 244
},
{
"name": "model.layers.30.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 246
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 248
},
{
"name": "model.layers.31.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 250
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 252
},
{
"name": "model.layers.31.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2,
"byteOffset": 254
}
],
"md5sum": "3dd53907af219ff4adc62aa62c8700cf"
}
]
}