dusty-nv's picture
Upload folder using huggingface_hub
b4de9fa verified
{
"metadata": {
"ParamSize": 313,
"ParamBytes": 3879214080.0,
"BitsPerParam": 4.07500989461587
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "e31688f707f2f116124d87be9c5b2f79"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 34062336,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 34062336,
"byteOffset": 0
}
],
"md5sum": "f15e1c34f09c298ff581a537ff66ecab"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "65db05fabb9eca3bc5cea7916f74c85a"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "ede83f5af023d4b21c96c42de4e21975"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "af2eabf0c47eee43ed1c30c6552bbaaf"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "e166556c756223f3bd49df9f4e0fd3ff"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 29615104,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 9216
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8266752
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8275968
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14698496
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14705664
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14781440
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14788608
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14795776
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14802944
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14812160
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23069696
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23078912
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29501440
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29508608
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29584384
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29591552
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29598720
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29605888
}
],
"md5sum": "092513153d87308185451fb37fab4dda"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "de19625518d6e29701db2a6452295ea8"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "dec90d137736beb8dc5c172ad3338432"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "bfe3c542bdd5d61dd007069afed49094"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "b608783c27572c2273f4609d58bd68c5"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "32a02cefac996ddf90675f4478e806c2"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "716abb5b6d7b23e2b863fd7d0c95fc47"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "04d0ef721626790769840bf87fef7e89"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "ff6d4f35f2a45eed453582d4a9c4917c"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "2e6289f55a037e98a471a5fa621e55bf"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "6f3abb2b123faa9bf04caaccd0416f57"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "6cf72872b717e6c757b9ff56c018b6d8"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "3c5fe62a625511dfa341859d08be9a67"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "0f3179e252d3f311550876eb8c706fd7"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "1ce677fa5f7c83f487e81880c4580ccd"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "30a5369c0ceef0bfb8e6a496c4dc721c"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "b69dbdc38c1df85267c3a2f2b65436de"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "2563ccb5d422ab813c9de962e43f39c4"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "a07e849b915870bee2b6b39fb44c32dd"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "92ab1ac879c451e1f076a1a854436b0c"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "e8358caccd37a829ce4fdd1deeaedefd"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "aa76473a203925b55099d9e372c73fb2"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "d11f78694076073fc014816b2f939c6f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "30a9be6bdb771cd1b187e4954741f2eb"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "3f60eee1e540d1f6191047224da0e6f4"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "7a00911114978ec23728f32f5e89aec8"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "1a73eb9f7853af728afebbc661e190dd"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "93b530cf5a1091e9eef9bccb358d91a3"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "9483359407807277a0eaf4b31cee0bbc"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "9005490447085c048f658381b8e1e57f"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "f82e09d780ea6b775db9b61f1061798e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "64634f86183b5bd65b6eaba9716d6574"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "93d04b6431aa8d9b30dfa9164600af14"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "68dfb660df2e61439d1c3beedf1d7893"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "f0852affa421d456c9d3c58a5a096f6f"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "068663cd94c9f59bda7a7c8fce55398d"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "798610b35a736af6b8894f0492d490c1"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "75e811b87b02a9a91cee92e8bbfde0f5"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "6e78f92056c9a85dbf00d174984f1929"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "aba695a6cf161a081b97142e603e0aa4"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "0b5ed982bdd3da65a59d1f2770e043c9"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "7a7cc969c7df278b04101f0006a57161"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "137f7fcef38d4b6124a433b5ccbcd3ce"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "6c791ae014115ed7c82c754d3e1c6f96"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "85774908f7a0f47dcbe30ed842acd12d"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "430056cffd91c87a73f953b3ee219089"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "73d0c51189946585febeb874e69ae8f9"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "16e3bf38802257a1afea51a3dcc81a70"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "4e101099f3cd9013ce1efe57f648b943"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "83df3f9f61d977d03260053dfa8158b4"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "daeaeb7c3100d6124106196e8a04db77"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "840435f3a78f7994d104c1eb1f7d8010"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "fb48c038ef825b044ac2ab0ad2c0a673"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "c52a17b91b3ab39e31531744e93ed53f"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "090c7b7b3358e4e112c4101d05641c95"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "420645d630c18cd06e523cb3bef9865b"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "42d512f8568e6f41e56bf74e9b5bb620"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "d39f6ff6e8aa864a22052f2a6981deee"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "d5958f63fc11c2aed977e16fa2a64b9b"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "6c090b3947e1f882e9cfb8429420dc9c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 29605888,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 29596672
}
],
"md5sum": "6ee7234308b97a59e36dc43efb577dad"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e0a40ff53b83009d749803058b4cda78"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "87aa58847d9a6d4b48124bfffc9da0b5"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
3584,
18944
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "6633a114e92a018ba7eb2e0e5ca821ad"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
18944,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "7c394d2eebdc11d6d5322f23fdf04131"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "99b263b1d276d84a055d7219935c6fbf"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 34062336,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 34062336,
"byteOffset": 0
}
],
"md5sum": "5c2258bb0a45ba1f907fff20c5703df9"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 29603840,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 8257536
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8266752
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14689280
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 14696448
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14772224
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14779392
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 14786560
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14793728
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
3584,
2304
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 14802944
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
1,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 23060480
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3584,
1792
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23069696
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29492224
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
1,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 75776,
"byteOffset": 29499392
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29575168
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29582336
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29589504
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29596672
}
],
"md5sum": "ff32f12c81ec50a8a34b853aeac5ae8c"
}
]
}