diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,5697 @@ +{ + "metadata": { + "ParamSize": 323, + "ParamBytes": 9081201152.0, + "BitsPerParam": 9.046979540559027 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1050673152, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 128256, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1050673152, + "byteOffset": 0 + } + ], + "md5sum": "a6945d49d5d076ecd0be132b67ce3cc2" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8c79f9f063890532332f8da2589ed0cb" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 1050673152, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 128256, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1050673152, + "byteOffset": 0 + } + ], + "md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "443a8c35a9ebcd27107a962a6b5d2e60" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bb18c864a5537cf1c820595a3c8da431" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "859269ff14f85726d23bf3fad4778dc0" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "dfa0102c5071e96fc7b3fec573a12fc6" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d980765228cff4bca8d09d9e1bcebea5" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "49190d9e24712de781967667600e3cdd" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "acc3232a20ec7475890b94f02bd6eda8" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "dd7979ad65b31f1489b5b4c1749389ab" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "dd1f1a497ef1a285b39a91f580f12a69" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "37573df1ed1baa7fabe8238cf193281c" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "a25453505d8d92f0d91d92a7ee58a6b7" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "555c55e65f56d6a98c687a7414073293" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "52aed4740d128eb93fde928489476aff" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "efb95ab09e911aae0d10d411910ba789" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "820b8e068f99a25c0bf6961000d614b3" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "52690d569efe271f6e10af8b967b7d16" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2a014dce929e00158b40a35ed78eaa8b" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "50fe929ef85eb45869fcbdaa485a26f2" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "51edb418e653f51e37388c92254f1c55" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "94bc4dc51bde779b0b5837bda6ef1e48" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a5d569a77a1e1a8631ce269965419f4a" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "004b0e6aa9e2e6164d73ad4a86ebb7b7" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "394b11a4bc9cc4f5c82f100516702300" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d37ba16417c015e75f9c0ecb41f99916" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a400e47ce9f48128f395716498b56488" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "07c60265438aa529bb9d49f9003dc8aa" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "b05a33ba8679e50d121a19296ef73430" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "fe9fe6e39b3daf7e766a97b0accac44a" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6f937e0faca4d2967d4dcf30e5ace1d4" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4c5676e78a7af64cac3cffcb1b46be80" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d0c30e46622b7a9b2e4de5291130ebdd" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0e431697fad371f03cf02ad332251ba2" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1aad061590af2b2ea4d96f84681a1ef1" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f4f03fe97da5b1c2911947db1141bcd7" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "e6d7420d843ab6310dc6fc22dd1e5efd" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a562f55f4556fb4bdd28bc9aa03d9f63" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d2aa280429d1818f82eeeefed114ec43" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d1a28ac62dda3bf7d3f3cd0e27d94a5e" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "31e163ad7a68f16e96df71ad962877d9" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "65958024e515f228248504cc4df04a19" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "16d9cf8211d69418089a2bfef3076680" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3b6e03ee6aff26fa893805f1cd992afb" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "529650f6598d120ccc5b73fa83934488" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "17a455efe78cc00cdfec8c902227d577" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e20de2b22c9b1d0d49bce144b2d465c3" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "717099fe6f5453f366c7dd88bb852ea4" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ccd6bba8648a350d9e02570cf4962d1c" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "fc750adb8ae849ac35ecc34d7dcd5816" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "61cb9e3222eb2a69d3fdc30874efdd27" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b67a7bb780f7237681a5c744d9d96c78" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "683707e940ecda57bb68dc6842765d13" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "346c3396cea046ae89931bf78a3e1253" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "379c00addbb94e2a32c180280580e84c" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0c5e6d5762b6b00988d11091d75e0885" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "bf5a15162ca2ebfd500f07450e29118b" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "eee0e77954364fb92068d470482d9558" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7e28e8c0fbd38a722ec1ea8d52f6d670" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e8cb44eeb6e8d9673043649f7bb6de92" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "afee385d7a56db4ba16e2d4338ee4c42" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "92140513fc226e5b04ef8b1729afb6e7" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "73c41a3ba5b79c9c3d4bffaffc8f29a3" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "16f7cdf21b686c7e2e7bdd7865b63f06" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "796f8694234c10cb7f70efece86d12f3" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "131e383fef78dff08e81670c807cade5" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5060b3a75c831a2400dd0964b0581b9c" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ee0b69aed4827b160aa43e7c816d7b89" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d8dd0f8ca7473f5ec14b8becfe8b84f6" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a383af93328fdfb651afc7ca478185da" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "26c5a94d72153d59e2882bdf743de499" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2612845126d3c9aedc64a130c41f4ef2" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "72c1020f23578326238a03735b7966ff" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "be43f2dcf01fe92c16c6fdde4578a332" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0165db642c3924a116f6a1401a5ac637" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9f281f74f05ffea2375765db33d8696b" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "2f6f92f71f61ccc478e623d9a8e9b3ff" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e43d80a678bf4e56896ead72f276bf32" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "392c740d765ea4c96b9493feb37fba74" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d64c984973fdfd7d06e351c3cb461635" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6b90407ec2dd6100382bed7f11b003a2" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f548ee06ba954fc568ce8b75ec87ed19" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2b5af8e2a16535b06a26ea71acaea36d" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ce4443881a0e04570a2cdbd2b6faa412" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "08336bcc0492b1b98db7db03ee54e849" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "df80a476928605e6acb9440e4dba49be" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "324ffd6ba8ef1197b271ae257d94dbad" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "20fa4e12dc4ba27bc856308cd57e66d7" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "5ce4cb53f657711729dcc54ab572ca6a" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "9d9010e8f2eaff4e98a2c5043377c6ea" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3fc4111ead3ce25a870e14369893dd41" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1b7d2288421481d15a3855013539ea02" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ca4f844016c03231ee4d3679f6ce853a" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cffe2e7008403dc9b39358572a05e049" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "fee8e0f2649a6a6cb1ecf90b5ea0274f" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "24a3d52bf0acf242430e94565ad89790" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "305eb9b0055af82cecb72a2e029368cc" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c9b4061fbf46d32fa71e56b5934da53b" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5d00551ddb2e5c5cf81e5b1a8c45f260" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0deed8dcc8050abf4a6fe21b3a3f215e" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "a354b3317c7b283c62f0a25ccd24366b" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "fd90ff929543b2a40f379a7b6a195c79" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b98eafecb641d082b37e528be46634a5" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "14889a87af60d074f8364af28e2e7d8e" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "0b8e5af5a33f2ea3584418f33a50c3ae" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "baa6fbd062d89c51d089910495901dfb" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "328decc74d93641fa9c16bea7e7b095a" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a769694c6b15113a19d0983f1e05d88e" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "fd837876a11c1392d04ba56ce7538d9f" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "734d71cf51c0458231752747b7a1f17d" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "65d2eed328271f325175167fe878cdc7" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "835f10c5a785548747edf40c329644d6" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "9e701dd471c50a60cbeaf9751ca3c184" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b79490047ce9371df8422d645af64968" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "91d8f96054db7a29034da491f2ad6eb9" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "703daf772565209c77c3d6090d146782" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "19628496ce6e0431aa505475391b4f33" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "ee77166a7802d504f6d7cac33c35503a" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2f47f1c1566a9248e344fca90b7e2382" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "37f4ca216ef82f37946cc70aea85ac71" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "4b4abc11c84f4485441812223b4155c3" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "464c942cfa622474a66436bd48b56e0e" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d6c08465157a49cbdd9845bd5db1bce6" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2b3d61740600e64883b6bc37e17072bf" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "8f4149db52ba940dcfa9f66a8dff8ddb" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e45a5a68dca017a35bb4b54e61fd367a" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "87d68d6aca79f1e389967ae28d4865f3" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d0a167a49b8d2e0ab3febca1d8a818ac" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 25698560, + "records": [ + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 0 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 8192 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8194 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16386 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24578 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 32770 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 32772 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32774 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "e4m3_float8", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 40966 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25206790 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25206792 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25206794 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25214986 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25214988 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25214990 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25223182 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25223184 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25223186 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25231378 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25231380 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25231382 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25239574 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25239576 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25239578 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25247770 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25247772 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25247774 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25255966 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25255968 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25255970 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25264162 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25264164 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25264166 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25272358 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25272360 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25272362 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25280554 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25280556 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25280558 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25288750 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25288752 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25288754 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25296946 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25296948 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25296950 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25305142 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25305144 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25305146 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25313338 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25313340 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25313342 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25321534 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25321536 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25321538 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25329730 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25329732 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25329734 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25337926 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25337928 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25337930 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25346122 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25346124 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25346126 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25354318 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25354320 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25354322 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25362514 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25362516 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25362518 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25370710 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25370712 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25370714 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25378906 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25378908 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25378910 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25387102 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25387104 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25387106 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25395298 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25395300 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25395302 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25403494 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25403496 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25403498 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25411690 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25411692 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25411694 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25419886 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25419888 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25419890 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25428082 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25428084 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25428086 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25436278 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25436280 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25436282 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25444474 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25444476 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25444478 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25452670 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25452672 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25452674 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25460866 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25460868 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25460870 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25469062 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25469064 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25469066 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25477258 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25477260 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25477262 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25485454 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25485456 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25485458 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25493650 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25493652 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25493654 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25501846 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25501848 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25501850 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25501852 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25501854 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25501856 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25510048 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25510050 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25510052 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25518244 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25518246 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25518248 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25526440 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25526442 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25534634 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25542826 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25542828 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25542830 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25551022 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25551024 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25551026 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25559218 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25559220 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25559222 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25567414 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25567416 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25567418 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25575610 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25575612 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25575614 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25583806 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25583808 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25583810 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25592002 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25592004 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25592006 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25600198 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25600200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25600202 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25608394 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25608396 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25608398 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25616590 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25616592 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25616594 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25624786 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25624788 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25624790 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25632982 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25632984 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25632986 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25641178 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25641180 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25641182 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25649374 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25649376 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25649378 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25657570 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25657572 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25657574 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25665766 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25665768 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25665770 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25673962 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25673964 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25673966 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25682158 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25682160 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25682162 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25690354 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25690356 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25690358 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25698550 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25698552 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25698554 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25698556 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 25698558 + } + ], + "md5sum": "a565ee54f0509be91e2ae031ef6743be" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 256, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 2 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 4 + }, + { + "name": "model.layers.0.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 6 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 8 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 10 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 12 + }, + { + "name": "model.layers.1.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 14 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 16 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 18 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 20 + }, + { + "name": "model.layers.2.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 22 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 24 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 26 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 28 + }, + { + "name": "model.layers.3.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 30 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 32 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 34 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 36 + }, + { + "name": "model.layers.4.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 38 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 40 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 42 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 44 + }, + { + "name": "model.layers.5.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 46 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 48 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 50 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 52 + }, + { + "name": "model.layers.6.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 54 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 56 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 58 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 60 + }, + { + "name": "model.layers.7.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 62 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 64 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 66 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 68 + }, + { + "name": "model.layers.8.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 70 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 72 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 74 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 76 + }, + { + "name": "model.layers.9.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 78 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 80 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 82 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 84 + }, + { + "name": "model.layers.10.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 86 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 88 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 90 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 92 + }, + { + "name": "model.layers.11.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 94 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 96 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 98 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 100 + }, + { + "name": "model.layers.12.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 102 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 104 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 106 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 108 + }, + { + "name": "model.layers.13.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 110 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 112 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 114 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 116 + }, + { + "name": "model.layers.14.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 118 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 120 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 122 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 124 + }, + { + "name": "model.layers.15.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 126 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 128 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 130 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 132 + }, + { + "name": "model.layers.16.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 134 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 136 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 138 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 140 + }, + { + "name": "model.layers.17.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 142 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 144 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 146 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 148 + }, + { + "name": "model.layers.18.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 150 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 152 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 154 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 156 + }, + { + "name": "model.layers.19.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 158 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 160 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 162 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 164 + }, + { + "name": "model.layers.20.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 166 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 168 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 170 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 172 + }, + { + "name": "model.layers.21.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 174 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 176 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 178 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 180 + }, + { + "name": "model.layers.22.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 182 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 184 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 186 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 188 + }, + { + "name": "model.layers.23.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 190 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 192 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 194 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 196 + }, + { + "name": "model.layers.24.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 198 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 200 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 202 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 204 + }, + { + "name": "model.layers.25.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 206 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 208 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 210 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 212 + }, + { + "name": "model.layers.26.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 214 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 216 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 218 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 220 + }, + { + "name": "model.layers.27.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 222 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 224 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 226 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 228 + }, + { + "name": "model.layers.28.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 230 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 232 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 234 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 236 + }, + { + "name": "model.layers.29.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 238 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 240 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 242 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 244 + }, + { + "name": "model.layers.30.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 246 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 248 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 250 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 252 + }, + { + "name": "model.layers.31.mlp.down_proj.q_calibration_scale", + "shape": [ + 1 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2, + "byteOffset": 254 + } + ], + "md5sum": "3dd53907af219ff4adc62aa62c8700cf" + } + ] +} \ No newline at end of file