{ "metadata": { "ParamSize": 323, "ParamBytes": 9081201152.0, "BitsPerParam": 9.046979540559027 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1050673152, "records": [ { "name": "lm_head.weight", "shape": [ 128256, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1050673152, "byteOffset": 0 } ], "md5sum": "a6945d49d5d076ecd0be132b67ce3cc2" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8c79f9f063890532332f8da2589ed0cb" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 1050673152, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1050673152, "byteOffset": 0 } ], "md5sum": "f2d28da53eec7a4bfb2c19fb349ee23d" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "443a8c35a9ebcd27107a962a6b5d2e60" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bb18c864a5537cf1c820595a3c8da431" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "859269ff14f85726d23bf3fad4778dc0" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dfa0102c5071e96fc7b3fec573a12fc6" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d980765228cff4bca8d09d9e1bcebea5" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "49190d9e24712de781967667600e3cdd" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "acc3232a20ec7475890b94f02bd6eda8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dd7979ad65b31f1489b5b4c1749389ab" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dd1f1a497ef1a285b39a91f580f12a69" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "37573df1ed1baa7fabe8238cf193281c" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "a25453505d8d92f0d91d92a7ee58a6b7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "555c55e65f56d6a98c687a7414073293" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "52aed4740d128eb93fde928489476aff" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "efb95ab09e911aae0d10d411910ba789" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "820b8e068f99a25c0bf6961000d614b3" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "52690d569efe271f6e10af8b967b7d16" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2a014dce929e00158b40a35ed78eaa8b" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "50fe929ef85eb45869fcbdaa485a26f2" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "51edb418e653f51e37388c92254f1c55" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "94bc4dc51bde779b0b5837bda6ef1e48" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a5d569a77a1e1a8631ce269965419f4a" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "004b0e6aa9e2e6164d73ad4a86ebb7b7" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "394b11a4bc9cc4f5c82f100516702300" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d37ba16417c015e75f9c0ecb41f99916" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a400e47ce9f48128f395716498b56488" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "07c60265438aa529bb9d49f9003dc8aa" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "b05a33ba8679e50d121a19296ef73430" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fe9fe6e39b3daf7e766a97b0accac44a" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6f937e0faca4d2967d4dcf30e5ace1d4" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4c5676e78a7af64cac3cffcb1b46be80" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d0c30e46622b7a9b2e4de5291130ebdd" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0e431697fad371f03cf02ad332251ba2" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1aad061590af2b2ea4d96f84681a1ef1" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f4f03fe97da5b1c2911947db1141bcd7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "e6d7420d843ab6310dc6fc22dd1e5efd" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a562f55f4556fb4bdd28bc9aa03d9f63" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d2aa280429d1818f82eeeefed114ec43" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d1a28ac62dda3bf7d3f3cd0e27d94a5e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "31e163ad7a68f16e96df71ad962877d9" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "65958024e515f228248504cc4df04a19" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "16d9cf8211d69418089a2bfef3076680" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3b6e03ee6aff26fa893805f1cd992afb" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "529650f6598d120ccc5b73fa83934488" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "17a455efe78cc00cdfec8c902227d577" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e20de2b22c9b1d0d49bce144b2d465c3" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "717099fe6f5453f366c7dd88bb852ea4" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ccd6bba8648a350d9e02570cf4962d1c" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fc750adb8ae849ac35ecc34d7dcd5816" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "61cb9e3222eb2a69d3fdc30874efdd27" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b67a7bb780f7237681a5c744d9d96c78" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "683707e940ecda57bb68dc6842765d13" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "346c3396cea046ae89931bf78a3e1253" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "379c00addbb94e2a32c180280580e84c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0c5e6d5762b6b00988d11091d75e0885" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "bf5a15162ca2ebfd500f07450e29118b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "eee0e77954364fb92068d470482d9558" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7e28e8c0fbd38a722ec1ea8d52f6d670" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e8cb44eeb6e8d9673043649f7bb6de92" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "afee385d7a56db4ba16e2d4338ee4c42" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "92140513fc226e5b04ef8b1729afb6e7" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "73c41a3ba5b79c9c3d4bffaffc8f29a3" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "16f7cdf21b686c7e2e7bdd7865b63f06" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "796f8694234c10cb7f70efece86d12f3" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "131e383fef78dff08e81670c807cade5" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5060b3a75c831a2400dd0964b0581b9c" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ee0b69aed4827b160aa43e7c816d7b89" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d8dd0f8ca7473f5ec14b8becfe8b84f6" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a383af93328fdfb651afc7ca478185da" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "26c5a94d72153d59e2882bdf743de499" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2612845126d3c9aedc64a130c41f4ef2" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "72c1020f23578326238a03735b7966ff" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "be43f2dcf01fe92c16c6fdde4578a332" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0165db642c3924a116f6a1401a5ac637" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9f281f74f05ffea2375765db33d8696b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "2f6f92f71f61ccc478e623d9a8e9b3ff" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e43d80a678bf4e56896ead72f276bf32" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "392c740d765ea4c96b9493feb37fba74" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d64c984973fdfd7d06e351c3cb461635" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6b90407ec2dd6100382bed7f11b003a2" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f548ee06ba954fc568ce8b75ec87ed19" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2b5af8e2a16535b06a26ea71acaea36d" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ce4443881a0e04570a2cdbd2b6faa412" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "08336bcc0492b1b98db7db03ee54e849" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "df80a476928605e6acb9440e4dba49be" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "324ffd6ba8ef1197b271ae257d94dbad" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "20fa4e12dc4ba27bc856308cd57e66d7" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "5ce4cb53f657711729dcc54ab572ca6a" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9d9010e8f2eaff4e98a2c5043377c6ea" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3fc4111ead3ce25a870e14369893dd41" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1b7d2288421481d15a3855013539ea02" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ca4f844016c03231ee4d3679f6ce853a" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cffe2e7008403dc9b39358572a05e049" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fee8e0f2649a6a6cb1ecf90b5ea0274f" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "24a3d52bf0acf242430e94565ad89790" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "305eb9b0055af82cecb72a2e029368cc" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c9b4061fbf46d32fa71e56b5934da53b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5d00551ddb2e5c5cf81e5b1a8c45f260" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0deed8dcc8050abf4a6fe21b3a3f215e" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "a354b3317c7b283c62f0a25ccd24366b" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fd90ff929543b2a40f379a7b6a195c79" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b98eafecb641d082b37e528be46634a5" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "14889a87af60d074f8364af28e2e7d8e" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "0b8e5af5a33f2ea3584418f33a50c3ae" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "baa6fbd062d89c51d089910495901dfb" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "328decc74d93641fa9c16bea7e7b095a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a769694c6b15113a19d0983f1e05d88e" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "fd837876a11c1392d04ba56ce7538d9f" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "734d71cf51c0458231752747b7a1f17d" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "65d2eed328271f325175167fe878cdc7" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "835f10c5a785548747edf40c329644d6" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "9e701dd471c50a60cbeaf9751ca3c184" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b79490047ce9371df8422d645af64968" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "91d8f96054db7a29034da491f2ad6eb9" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "703daf772565209c77c3d6090d146782" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "19628496ce6e0431aa505475391b4f33" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ee77166a7802d504f6d7cac33c35503a" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2f47f1c1566a9248e344fca90b7e2382" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "37f4ca216ef82f37946cc70aea85ac71" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "4b4abc11c84f4485441812223b4155c3" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "464c942cfa622474a66436bd48b56e0e" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d6c08465157a49cbdd9845bd5db1bce6" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2b3d61740600e64883b6bc37e17072bf" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "8f4149db52ba940dcfa9f66a8dff8ddb" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e45a5a68dca017a35bb4b54e61fd367a" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "87d68d6aca79f1e389967ae28d4865f3" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d0a167a49b8d2e0ab3febca1d8a818ac" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 25698560, "records": [ { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 8192 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8194 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16386 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24578 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32770 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32772 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32774 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 4096 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 40966 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25206790 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25206792 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25206794 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25214986 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25214988 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25214990 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25223182 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25223184 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25223186 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25231378 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25231380 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25231382 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25239574 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25239576 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25239578 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25247770 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25247772 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25247774 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25255966 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25255968 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25255970 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25264162 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25264164 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25264166 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25272358 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25272360 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25272362 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25280554 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25280556 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25280558 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25288750 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25288752 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25288754 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25296946 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25296948 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25296950 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25305142 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25305144 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25305146 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25313338 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25313340 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25313342 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25321534 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25321536 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25321538 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25329730 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25329732 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25329734 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25337926 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25337928 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25337930 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25346122 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25346124 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25346126 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25354318 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25354320 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25354322 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25362514 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25362516 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25362518 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25370710 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25370712 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25370714 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25378906 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25378908 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25378910 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25387102 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25387104 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25387106 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25395298 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25395300 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25395302 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25403494 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25403496 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25403498 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25411690 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25411692 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25411694 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25419886 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25419888 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25419890 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25428082 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25428084 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25428086 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25436278 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25436280 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25436282 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25444474 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25444476 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25444478 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25452670 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25452672 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25452674 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25460866 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25460868 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25460870 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25469062 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25469064 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25469066 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25477258 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25477260 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25477262 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25485454 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25485456 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25485458 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25493650 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25493652 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25493654 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25501846 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25501848 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25501850 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25501852 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25501854 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25501856 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25510048 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25510050 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25510052 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25518244 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25518246 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25518248 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25526440 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25526442 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25534634 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25542826 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25542828 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25542830 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25551022 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25551024 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25551026 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25559218 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25559220 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25559222 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25567414 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25567416 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25567418 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25575610 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25575612 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25575614 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25583806 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25583808 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25583810 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25592002 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25592004 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25592006 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25600198 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25600200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25600202 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25608394 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25608396 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25608398 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25616590 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25616592 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25616594 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25624786 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25624788 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25624790 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25632982 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25632984 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25632986 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25641178 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25641180 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25641182 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25649374 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25649376 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25649378 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25657570 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25657572 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25657574 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25665766 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25665768 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25665770 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25673962 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25673964 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25673966 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25682158 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25682160 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25682162 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25690354 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25690356 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25690358 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25698550 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25698552 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25698554 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25698556 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 25698558 } ], "md5sum": "a565ee54f0509be91e2ae031ef6743be" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 256, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2 }, { "name": "model.layers.0.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 4 }, { "name": "model.layers.0.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 6 }, { "name": "model.layers.1.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 8 }, { "name": "model.layers.1.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 10 }, { "name": "model.layers.1.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 12 }, { "name": "model.layers.1.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 14 }, { "name": "model.layers.2.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 16 }, { "name": "model.layers.2.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 18 }, { "name": "model.layers.2.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 20 }, { "name": "model.layers.2.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 22 }, { "name": "model.layers.3.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 24 }, { "name": "model.layers.3.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 26 }, { "name": "model.layers.3.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 28 }, { "name": "model.layers.3.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 30 }, { "name": "model.layers.4.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32 }, { "name": "model.layers.4.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 34 }, { "name": "model.layers.4.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 36 }, { "name": "model.layers.4.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 38 }, { "name": "model.layers.5.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 40 }, { "name": "model.layers.5.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 42 }, { "name": "model.layers.5.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 44 }, { "name": "model.layers.5.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 46 }, { "name": "model.layers.6.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 48 }, { "name": "model.layers.6.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 50 }, { "name": "model.layers.6.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 52 }, { "name": "model.layers.6.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 54 }, { "name": "model.layers.7.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 56 }, { "name": "model.layers.7.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 58 }, { "name": "model.layers.7.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 60 }, { "name": "model.layers.7.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 62 }, { "name": "model.layers.8.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 64 }, { "name": "model.layers.8.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 66 }, { "name": "model.layers.8.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 68 }, { "name": "model.layers.8.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 70 }, { "name": "model.layers.9.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 72 }, { "name": "model.layers.9.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 74 }, { "name": "model.layers.9.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 76 }, { "name": "model.layers.9.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 78 }, { "name": "model.layers.10.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 80 }, { "name": "model.layers.10.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 82 }, { "name": "model.layers.10.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 84 }, { "name": "model.layers.10.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 86 }, { "name": "model.layers.11.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 88 }, { "name": "model.layers.11.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 90 }, { "name": "model.layers.11.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 92 }, { "name": "model.layers.11.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 94 }, { "name": "model.layers.12.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 96 }, { "name": "model.layers.12.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 98 }, { "name": "model.layers.12.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 100 }, { "name": "model.layers.12.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 102 }, { "name": "model.layers.13.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 104 }, { "name": "model.layers.13.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 106 }, { "name": "model.layers.13.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 108 }, { "name": "model.layers.13.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 110 }, { "name": "model.layers.14.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 112 }, { "name": "model.layers.14.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 114 }, { "name": "model.layers.14.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 116 }, { "name": "model.layers.14.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 118 }, { "name": "model.layers.15.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 120 }, { "name": "model.layers.15.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 122 }, { "name": "model.layers.15.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 124 }, { "name": "model.layers.15.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 126 }, { "name": "model.layers.16.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 128 }, { "name": "model.layers.16.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 130 }, { "name": "model.layers.16.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 132 }, { "name": "model.layers.16.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 134 }, { "name": "model.layers.17.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 136 }, { "name": "model.layers.17.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 138 }, { "name": "model.layers.17.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 140 }, { "name": "model.layers.17.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 142 }, { "name": "model.layers.18.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 144 }, { "name": "model.layers.18.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 146 }, { "name": "model.layers.18.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 148 }, { "name": "model.layers.18.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 150 }, { "name": "model.layers.19.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 152 }, { "name": "model.layers.19.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 154 }, { "name": "model.layers.19.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 156 }, { "name": "model.layers.19.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 158 }, { "name": "model.layers.20.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 160 }, { "name": "model.layers.20.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 162 }, { "name": "model.layers.20.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 164 }, { "name": "model.layers.20.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 166 }, { "name": "model.layers.21.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 168 }, { "name": "model.layers.21.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 170 }, { "name": "model.layers.21.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 172 }, { "name": "model.layers.21.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 174 }, { "name": "model.layers.22.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 176 }, { "name": "model.layers.22.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 178 }, { "name": "model.layers.22.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 180 }, { "name": "model.layers.22.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 182 }, { "name": "model.layers.23.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 184 }, { "name": "model.layers.23.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 186 }, { "name": "model.layers.23.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 188 }, { "name": "model.layers.23.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 190 }, { "name": "model.layers.24.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 192 }, { "name": "model.layers.24.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 194 }, { "name": "model.layers.24.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 196 }, { "name": "model.layers.24.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 198 }, { "name": "model.layers.25.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 200 }, { "name": "model.layers.25.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 202 }, { "name": "model.layers.25.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 204 }, { "name": "model.layers.25.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 206 }, { "name": "model.layers.26.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 208 }, { "name": "model.layers.26.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 210 }, { "name": "model.layers.26.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 212 }, { "name": "model.layers.26.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 214 }, { "name": "model.layers.27.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 216 }, { "name": "model.layers.27.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 218 }, { "name": "model.layers.27.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 220 }, { "name": "model.layers.27.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 222 }, { "name": "model.layers.28.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 224 }, { "name": "model.layers.28.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 226 }, { "name": "model.layers.28.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 228 }, { "name": "model.layers.28.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 230 }, { "name": "model.layers.29.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 232 }, { "name": "model.layers.29.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 234 }, { "name": "model.layers.29.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 236 }, { "name": "model.layers.29.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 238 }, { "name": "model.layers.30.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 240 }, { "name": "model.layers.30.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 242 }, { "name": "model.layers.30.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 244 }, { "name": "model.layers.30.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 246 }, { "name": "model.layers.31.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 248 }, { "name": "model.layers.31.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 250 }, { "name": "model.layers.31.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 252 }, { "name": "model.layers.31.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 254 } ], "md5sum": "3dd53907af219ff4adc62aa62c8700cf" } ] }