{ "metadata": { "ParamSize": 746, "ParamBytes": 108037385216.0, "BitsPerParam": 31.5064986120372 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 2819260416, "records": [ { "name": "language_model.model.embed_tokens.weight", "shape": [ 262208, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2819260416, "byteOffset": 0 } ], "md5sum": "9db8ab4739fbbccb2cb6855cdb173a48" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.0.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "525fc86770c2c1dfd57cbc66bc8fb435" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "73d7df5cd22325f68e8536527f6c87bc" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.0.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "60e495d83a82158ea2a7eab3d380ffd2" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.0.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4e8a168918ee3feaf3c4b9b6eb07f22d" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.0.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "542fdec345b58f30e97e35a615c3ecd6" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.1.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "c9df0c4356e570df2d82d35576240ce8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.1.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1e5d02fb93bd00867dc1727204255247" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.1.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "605025cfc534dc7b2f4eb30f6adf27d7" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.1.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "cfd960cc2494e6d5405c0619b770297c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.1.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3295bd1aa0fe848d2575821bd1c15589" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.1.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "e061c178964166ab3f0eb2bb515379e0" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.2.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "d2b6722fef2c4abfb941f1729fde8491" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.2.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "cc6a9617f7282d40bdf7fa29d55d014b" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.2.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "202dd37b5e92168248863d3017fd208e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.2.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "96381bd07aa882fc79a624dd5241c9d4" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.2.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2af53721b9c9bee13716b541773103e6" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.2.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "93a711d1af357b5f09a60302e72fc1c3" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.3.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "b58d22e438da2808e350dcb1cef7ee06" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.3.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "0b25dfd7d47e1b58d00839b39bb22511" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.3.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "61e38e88109d280140af4cc089466581" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.3.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7493a78c7df233dd12c49e97a55973a0" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.3.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d86926226a4e1bd0f039f73796e8f0d8" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.3.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "27c6c850b952f319ad783f24dba59f89" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.4.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "74f7650e4cc77e0ecad4c46ec73120b6" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "0a07c97fe646ac7b954338ade3bd2abe" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.4.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "72ba60b2ce8915df4bffb21b78e83893" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.4.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c56dc92979fd82c0a2ca06aa17c456e6" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.4.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4a89ae09f6a5167a14ed3565feca3d42" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.4.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "048dad50c5f79948545675b23e0c2d58" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.5.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "28bb916cdac511be9bbcb10104ebab6d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.5.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "7c4fc6cfcd19ba92e329205c7487c0ba" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.5.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "2024ae70d68a0ae79c1a577da90f3fd5" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.5.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "a46e97b17fa9d980107caa532a573031" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.5.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "23d3b110923d0d252af0610a2c57f7ed" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.5.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "43c0426c8ed1acfbb965cd9e62774a8f" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.6.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "0db13e05225efc0c978db64154ae6929" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "f228a2fc11f6e81c7a4e617ca4c16fe3" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.6.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "bcee9882da0f3ab7382ec850ef03c969" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.6.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4b12852b7c49699bd544e4ff3ec6a218" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.6.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "9c14e70bd2cd2fb23a5ea0dd62f0fa63" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.6.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3f6dd555da7db2858ce4784e1fa4bcd2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.7.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "663cbfa567fedf3aa034a5431f8761fb" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.7.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "e80c24213fd40364f6ac75d1b089e614" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.7.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6c83281f394ac14b6dedc0d7275a491e" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.7.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2dfc9010a30b52f563a4b22bdf9eb941" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.7.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b72c84204532d12df085cb4e83c98d99" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.10.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "6543c383f9d708af7e7963e6a67086e9" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "ece300ac24307f4d356ce41e74ea1159" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.10.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "449db1501d122edef9f7a62ed1d08059" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.10.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "27990155cb15d499ccea5724b8214966" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.10.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0ccd835f6869f2cb474093afff09ff73" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.10.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "53bd053ae7d11cd13df3a35cb106b705" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.11.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "7ec9337ce3f794e6267942bbdddd6f83" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "98e8936df67f43b4cc4d240899529b26" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.11.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "db24a0a743cd6981fab158491773e613" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.11.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "61bee7038062bc214bffcac584ad0ae4" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.11.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f1e72ac36873f31d7aacb01593d9ab35" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.11.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "74ddc3f49f2ebc57c8d21677172a2422" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.12.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "2d2d34fe562c1c4b3217b7bce563f625" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.12.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "0e6a642f5003063f8716b3bfa5afb36c" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.12.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4d8f56196ab36e6e453115d810994a2f" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.12.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "01174c22ea62d461d4685ea29014b539" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.12.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "24dc0cc912ac34230f77863adc67025d" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.12.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8dd24ca48d83d782951b984266f0a6e9" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "f7f9f2b4965fb16a85a50ad80d3fe9d4" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.13.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "e2a18ccb39d75bfc703316092c4348c0" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.13.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "dc9c910615fd2b0eeb1746c262cab7b9" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.13.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "aeed325363d1d7947699cf592c1c88b8" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.13.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "fa21c7b277cdde6e19f4368e9c9ec7eb" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.7.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "1b162f4b133b4f923f4f3d13bd4168da" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.8.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "05c479841e3ba089fc4968636dddf17f" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "b46acd034f202fb2346da49a2a275fa6" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.8.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "7f4a7b05471c85ee8ee721ce566a6edc" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.8.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "59fe3425f3748f50eeaedb428bf2fb69" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.8.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d8ae82a7ab6baeaf14d926c7e7c1cf6f" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.8.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "c907fd214c0adc4bc07996f86f5ae389" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.9.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "10cb5e70137cec1e278c379a9b7d71c7" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "87c72e0b0531ed2f7e0956a398913b2c" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.9.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8b1e5ffb2bd855995d4db0d41434182e" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.9.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "cb6e67ff98a52b1382d5804cc9a23fc2" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.9.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "7b881ac328ff29fb4cd42dd2816b550b" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.9.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "a1402e8f5aeb1098fb532287a6c60ada" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.13.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "ce26665a542ccd8728ccd5c1e151b4b0" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.14.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "3ac9d18c0b6a969979fe03dcee922dcb" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.14.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "83e11efba4ce4c9abcbedc93828afecd" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.14.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "a3a7c439becec2d2d21ec6d98ad36e69" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.14.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c3e09250c59c417a510423d2385a25e9" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.14.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f17aa0fb82b3e0a91ce065c56992a8cd" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.14.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "7c58d43faacb35f1fe3f38ebaa4af09c" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.15.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "c066d755a177a9f8a09f50d10fcf3c4e" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "2e0d1b05ed5168a9911f4ef024b619c0" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.15.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3e89accfdc75b39d4742e769c57d9917" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.15.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "cdf9ff981a084f3a17c712e1303297c9" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.15.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6dd1ed55f0469a10fe1b86efb948787b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.15.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "309c986aaaf64b01f23fe087adab56a6" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.16.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "c298b583fd0e6f5a42bff2093e082f77" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.16.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "7d9655333609a3eef1aae02ba8ed9745" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.16.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1253ee38620aebf013772ffb9cdd0728" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.16.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "8b2b287605e4b4d7df18a7b62a41102e" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.16.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d01aa66d27ff0945296718ae1382d211" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.16.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "c9728ff49f18d9d5aea35e5c70d26233" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.17.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "fc660f4240c7f51056748502baac05b0" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "d2a0016359f4122f7dc5a029c4cdbed7" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.17.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "5c93cee987e87fa46510bfffd7304e58" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.17.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ec90247544470a2d0747397210e2e70a" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.17.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "93502d845b53da9e94f2fd3a87b79d2d" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.17.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "dbaa4c974e865d09eb19100de817fead" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.18.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "66522f3ee2f5ebbccd3be6cd748ea3e4" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.18.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "a0c3b8d9a54724869200dee707bd5a46" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.18.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "d9f757df4d48d1595139c007be3f139e" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.18.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "dbf83b1676312570494ba169c9c159d1" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.18.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "15a6b51b07d9660411af1f45ee63a3e9" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.18.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9a086d27456b2ab951c6f681ac6dae30" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.19.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "b84fa017d1c99095b988f9d11d76b9ba" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.19.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1b180d88cacceb5e103f3ab5c4a0b4c0" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.19.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "caadabe77eb70990cbad0e1c3333cbeb" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.19.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "58b199e0255efdb13e3b08aa67939e67" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.19.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "d914f3d40926473c5eb49b3c38edc8a3" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.19.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "54d0f7ab0c6d0b3d170c0caeead2f796" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.20.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "9799985f00abdb9c2d40fa49e7033b57" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "c0aade48379c94f4f250c223bbbbafab" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.20.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "28c56307fb4e8058bb622444dc30eb95" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.20.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0d89d786451bf31a85170b7c4cb06108" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.20.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "20b226d71fb6fc51dcc2ba8353bbdf74" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.20.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9c3b9b684504a34499f9c70d6de28c53" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.21.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "1b5e2d740d7afa2b03d3eb18641e16ac" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "3a7ed5d6755afa6661c67a0346f963c5" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.21.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "0f8d9db25e0b0d5f06364e3d34d42b0e" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.21.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "3809de0bf8fa5668d09741b45746b3ec" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.21.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "139ace6ca9122d462f9ee61461ec77de" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.21.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1e47c265c8fea348de9ec378ee5b54b1" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.22.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "8858ce6e5a09c84fe139b831154883ce" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.22.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "8f3ddb7652eb609343b8c69143603143" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.22.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "f3f51c08bc5a14cc6621e14885262406" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.22.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e6319eab95e0a155511bfdc6870d3563" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.22.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "48beff52a3ce052b1b96e3ad1b44ab54" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.22.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4c0108b648640ccefd3ec84568d06241" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.23.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "10b69956b8936c2fb398263b6a3204a8" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.23.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "fed326d8d93fa618daf1fdde980d8d1c" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.23.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "627724b07978af1fbffc500482b996b6" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.23.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b032ee45f47d6078667f77d30a5df8ec" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.23.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d7ee2b26f1ef82a3fcee30c89b76aa23" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.23.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "7595e13dfcad27df179c0c327d79ce09" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.24.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "4055217607959407f5e10262e4919de1" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.24.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "98486a7cb728eb866c70a60e9c841f20" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.24.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "75f690edb7ef8707993d156a212ed943" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.24.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "13c08b290fe6dc16f47079469f10bb32" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.24.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4caa30fefdb36185a0c81af891d2539f" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.24.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8480071dea961caf0a12b360e58c867d" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "9a321dc09700c7ec2f17e96acbdf6958" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.25.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "1c801d1145a81654636e64f025d8a261" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.25.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "daee189698f1085d78a8a5cac6291a07" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.25.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "2ed89537e1091c8773fbdfbf96cb507e" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.25.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "392dd9638099abdc6ec4e40ef02b201d" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.25.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "0321ed4d141d7e2ce063d4daf02c05f7" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.26.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "caa9929debb0a9f6b3f994ec902c5e5a" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.26.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "c6778bd43069b009f8f9fabc9eeeadb7" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.26.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "6b7ad6a017622597bfb102d9f250f72e" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.26.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4ea2fcd353f719f33c1e8fcad13cefe8" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.26.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "221aef305f36681aea804091a21f7e0c" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.26.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3ff56bd121cd8006cd18a7209b1a0ede" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.27.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "1b602776802de78eeb719c04abfbb212" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "8abbbdd1513032f2b9990d7228fd7af9" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.27.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "77f9efcca45b9b12afd4a3784b39b814" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.27.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b6dacac406698fde51ebd963e57b2d4f" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.27.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6e74312465f6bb89ea94fac1311d3240" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.27.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4feff801f8633c3d6960f82029d63b6f" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.28.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "fc1256bb8e61b47fd00a727f9a042f74" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.28.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "33c9ec7cab154e617175ad0fbe91c65f" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.28.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "6b7ce422d2b030de9a3c2262ae1f6b89" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.28.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "84007ecbe9e53e0de3f9a5037b98cfa5" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.28.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "c76948f9dd22f67dcedd2e1dcbf382e1" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.28.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "075f438aeeb3f8a7094efae18996110b" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.29.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "2fcf5b0bb799f6bf13becda06ae06841" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "6bca3cd6f3f44171384c37b7f8e162d3" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.29.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "78ecbfd4f7e30667355f1f9ef9322259" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.29.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f3f0ca333ab807828d57425b9252a594" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.29.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "350eafc807b900835d9f07489a7b633c" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.29.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "a1485335cfb49be794e53d6b618bb4f4" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.30.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "b69ef216c4395e42f204a559e24c8258" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "09a5023ee7064f9f5d6e546c6f39392a" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.30.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "753ffaf2a2391b76e376a3e3aba1d1f0" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.30.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "08cd100b06600f8c2ad1dc2bef22c29a" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.30.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4138944efcd82635619b42d210dd6583" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.30.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "952fcb9fb70f0fe1d6bc3ad91aa52713" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.31.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "a8de273bcd100bd99232358eded06b13" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.31.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "5a996be6be4860df65b9c3afb0a615d7" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.31.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "3c66dce37c5f4e126912abfe8e4b9ffe" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.31.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4df56c71b50767549c43480afbb316cf" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.31.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "69d6110a780fa2645fe0823e37f4b06f" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.31.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "90fe9a351094f355e322fcc3a04aca3e" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.32.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "97f0d93a0f4e737ec9df3f86f0803d58" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.32.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "084f5b152520afebbbcf1a8f22881c07" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.32.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3fbbc9b07802e8c88abbf38f830472b6" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.32.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b3bb719fde10642b0e8cf8b59d9a453d" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.32.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "655c754845f5537da4abfe0a28716b9f" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.32.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "bb568b08ad78b65e5c911d2c41c151a4" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.33.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "aa2d84b9baf3351572de35abbba0b568" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.33.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "639fcbafeea79891ad952f2b32df2380" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.33.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "aa9715ca162dde2902eacb2cca70e92f" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.33.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "3e9f3e7420b9d517f85231fc5eca28fe" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.33.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f494c6801f82fc9e2b922d2cfa5431e4" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.33.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "cdc03983f358d56301536296ad80f19c" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.34.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "6ee7233a7868abaf572d87a5689c7771" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.34.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "a579ddc61de21f5a6181e6ac6d38be3a" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.34.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "79a46dcaf135439fa3ba570b83021c13" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.34.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "842343dd6da3713548002dfb750abf33" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.34.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "3dd0edf60a548a845e9418711ca3cb4d" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.34.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "0f73541485dae4510502212dd92f8b64" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.35.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "0b2f9848089b98bb6edded1111009ec9" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.35.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "aed2854fe62122531e0b252e86255ffd" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.35.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "eb6aba8997dedc363d4968525a87cbc7" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.35.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "903a23007fc7b8449592b17faec5a035" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.35.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "99e52e7d174601ae06c09481af4aa1af" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.35.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "caf34895046736ba5678e67b69a0306d" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.36.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "5e6afcff5bde0ab64f486080b6f9492c" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.36.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "f324fd9669c3594463de8ab2e63276eb" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.36.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "7ed85b58401abbc1e1c3122bfd3df7ea" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.36.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "90c58356e1ebca66e1b8d5113694793e" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.36.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "5ed40db60ee89fbd16303b1e7e152e60" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.36.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "6811e8d91a307278c8018464c73337d8" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.37.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "4acc08ffdf17838b5dbb5fe5ca9b1791" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.37.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "6ae03f31b394443cfe6ad8cff9024721" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.37.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "772bdcc32bee1a152154b77d76d1b14e" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.37.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f6ad1652f99cee122fb5567e60d96308" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.37.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "a476ce510cab9de736b9ba7631c53869" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.37.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "08bda377b65cbd5e13aad999746988a8" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.38.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "c594e5100c3861601cbab39af15b5710" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.38.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "1a12dbe2332420324e483faa38ebdad4" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.38.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8dd14761fd6f8b79d14a0384a85af179" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.38.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b271e09758b01c948e6b3d8876211750" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.38.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "22b285555417a2bd3b0db2da17b0fd26" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.38.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "dc1c3b1747775eee05407863728e0f30" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.39.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "563a43570dbb8e566ea77c0a50cdf025" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.39.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "93b0ad783dfe2de502ec2f6bd4811f5c" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.39.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "945a946249307a8f15022d654d1c6a16" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.39.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0a606433704495f32764e8ec62021043" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.39.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d4e6b670503645294cbb17b64ee0b048" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.39.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "cefa69e5d80f02b1f4d6a52f22ce0f6d" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.40.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "487ac41ce845def31393927de1bd0356" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.40.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "321d357876415f86f200205d7bcd65a0" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.40.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9f1ed64a0c6a0979aff085b2930451b4" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.40.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ec3f800cfbd7c6d5185e9a5cd310c18e" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.40.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "df93963a6f66a69d37d747c648264ddb" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.40.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "263a081fa872728895a72729f1af8d5f" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.41.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "f2942f151a98dbd179da6bf5c9ecb1ab" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.41.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "29161228570c28d6c0d779bd6a68d344" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.41.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ed83c6a7679797d5d7f02b8a8af1a472" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.41.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "64bd81e5358736230620296e25109d56" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.41.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0163ed5fbecd8b78aa258a43eb9046c6" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.41.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ccc5c5b461c98b9327b8a22730fecbf7" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.42.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "fe7076688577ab9872aecfc50255aac6" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.42.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "31a1aa293c7eb02fe403cf3f318630a6" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.42.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9477d3c8ec0716f777332eeb29b2339c" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.42.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ca9bc850e95136bcd2a9df4cac8c6500" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.42.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f08e379b1b91e7c0c9e0634d776a641b" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.42.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "72557b2eac5b951f71e524b179c3d884" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.43.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "93937c9f141472e3bde0d515fb24b778" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.43.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "27e335cf3151a07b2272a093e56f13c6" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.43.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b71a50bcdb3350e8eabf68de1be56ac5" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.43.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "a7d481455a955c4fd60f652a2672379b" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.43.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "60f66ab858cf972047cc69ed95437c89" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.43.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "19c951020957ea52da9064bf5d1b1beb" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.44.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "024c75324cc3d57e54b1b824ebbe1739" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.44.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "ea40c196888c318e3dc88a7ec5d1a4ed" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.44.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "21ff7755816281f12769a055f20cc348" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.44.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f377d0c2bd740cd180e7ad10fafc137e" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.44.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "98ba9e171c649533b69fcfa55865b00e" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.44.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "608852e06215293c07a18426265f3550" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.45.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "16c75a10c4ae446bae4694f2bdbb0b01" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.45.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "f985095d0872d9601862d29ed8a937ef" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.45.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b0c22adc137dd01910999d2e79f950e9" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.45.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "145e834de39247b3e77fc92ad0f183ae" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.45.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "4a1f90c0f521e987fd46d645023f85e9" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.45.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "33eed963fe8e9d328ad97941de5c4307" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.46.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "839225a3dcdbf268873f93bc6de4d709" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.46.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "5d401d09d804be825901813e1d8f624a" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.46.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "66f83e66469bfd5abd1d14e2ef85c5de" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.46.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e3e0bfcec4c783d180b8bd55be8c3b4a" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.46.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "280d6c978fc3cf2f3c90cfdfabf7bc9a" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.46.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "6b8a8979b086362bb54e9491706f1e8c" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.47.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "6ef5d55080c0fcfc858b85ae1bb4fe71" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.47.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "988e6dd4b1437a20588125d091df66d5" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.47.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "83f3d6031a2117d5cc7d13dbaf013134" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.47.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6e03d73ce895f85dadc128f759b713ee" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.47.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "91e8511f2759ec1a9d8a8b85e0f60db1" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.47.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ba7c1b765b964f76adb93bd59601a139" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.48.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "9b2f6f309c4ff81f5fce52cc4d4efe3a" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.48.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "7a25006b0dcc975d566516e2510914b4" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.48.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "49bf7b6fd54f58cf48d7a45437589a43" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.48.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "b45b6f6e603b8f79745e25f771c9b4b9" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.48.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ba4d2b427c2558bdab23eb275f4d24ef" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.48.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "d7c5b260b3fc2a36eb061eb676abc2af" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.49.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "765f336eaf9db7d84695f1b27af5b8d6" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.49.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "708facddf031faa14470d487fb8fa54c" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.49.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "169e8c71f60def0609d677e7c4a3316c" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.49.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "144180921027cc0ef07e5cda42355b81" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.49.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "8fcebc0a25ecaeaa58bc5d76ede10df6" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.49.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "ec4723d2ed763c3b459edb7f6d404a64" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.50.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "a83f50536d8ac9328ea0336987d5c6e8" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.50.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "e2a15f67ea3b0f0637cc0592219c9a11" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.50.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "d30322d17fb2cbed4c9270708f0a7bbb" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.50.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "a428ae82868587076a3a715c81df4599" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.50.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "ba69f6d3ccca513ed3f2f175203ee8cd" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.50.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "52062418c8de7d53a917880689b5f5ff" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.51.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "06b79d56d0ad98553730489092ca2fbe" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.51.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "092529b2af513f960e7ebb4c64ead70d" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.51.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "a923096e4b2ff0b24c952e05ac9906ed" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.51.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "17cb8d60e03e8098947767549f7f96d1" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.51.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f43c832e8a1af5b6c08c8df7a519d12e" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.51.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ea7753144e764c3ead071d82f8718201" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.52.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "8b72bfa8253bcecf9b70ecd7f55e3eeb" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.52.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "8e8f0002a526b683bad22ca81a92ffd6" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.52.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3f57f322c8db7cfcf838f9ee4da2777d" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.52.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "dd9a5361d3381e0239c8862daa2a776d" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.52.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0afea4559fb2bc1c8bb0ada806238b3c" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.52.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "71612d0a63b04221d375f69a59053474" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.53.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "0829779b7f1462077909bcf85d3dab7d" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.53.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "f6cf2763a62a47d02ec8e37e2d2352ab" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.53.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b4adbc76ceb9e9fbaf78676688f3cfc1" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.53.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "a7d4e7345fc5ab1ccd1da40c5b43ead3" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.53.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "9429bf2f1d77c7a769132831cfd5da74" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.53.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9ba679a6b350505081c7e5d792e5204c" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.54.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "a39451ef52e787587f5924ab58f5493d" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.54.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "b0ddc17d43ec4ce5c357c97c306ff53a" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.54.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b1c416eeb1512702cf0256c441003c07" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.54.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e0ee88a7ab9cd03d904063571e38542b" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.54.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d1e887b5af17dc5948c23f4cc95981e9" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.54.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "ccc0d71880ed25ef6823fcd8d297e5d1" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.55.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "31a54f3fbfdca781be13dd6238f4d7a6" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.55.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "205c6a8fb59ef20d609ddd3a816279c9" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.55.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "fc695ff4b3d5660f6c829f92a2abf7ac" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.55.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "fe128fc174744ce0a338b54140d0d5bd" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.55.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "80aa9d8cccd1e8c82d0d39ba58c7e83f" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.55.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "8423fe95f70c0e5635f69f804e7fdbba" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.56.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "cdb857c2ca3a225a9698f47f50f5477f" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.56.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "56e86a5a03d3ecb163f1935f9c9ac36e" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.56.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "9012d2d3a34e872b5a346d6adecd37fa" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.56.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "6cdebc0812557695ec6e1e43baa2926e" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.56.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "974a10889dc290f03cd2b44966686f54" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.56.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "4684ec332246df3dc21b0e783bc129b7" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.57.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "5b4fbb84e25b940eddc62e1945734cbc" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.57.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "5276a0f5ff1ea371e17e6a9dd5d5fa8b" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.57.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3706f904af696642a60a37544b77bd41" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.57.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "8705b8034e489b28e32234ea90115344" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.57.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "e4c81930e56ab07413b11ac89118dc7a" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.57.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "3202ff811c56a803f509fb85ef72f042" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.58.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "6f4599b40ef9e96f4c642b0d18788ecc" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.58.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "3d98121d54a5f76ef4de1818d192ef98" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.58.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "c1537f8418da0df05b0dec02b19ee3fe" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.58.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "41f9946a954c8bed3b193196e976a06f" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.58.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "9613f7050256178e7cfc8f37299f4242" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.58.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "a666fb5b8c6a2811470c0cb8b3ba1ec2" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.59.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "1cd2669b7f85cfc0173f34b7892ae4b3" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.59.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "6fe6d3ac0c7cb1cf5b3c475be61d6b38" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.59.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "e8a5f0c8297b34a8ae41bb4846ceeb34" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.59.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0a8c025fbd6f9a1b7004f3a340948831" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.59.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "0fc3a33c004e6e25a3d55c9548cebf99" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.59.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "45a626edd005f4fa5f08828b16dda6d7" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.60.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "d42593903c3186aa50e2a8e3eecff67b" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.60.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "f41ac05cc4865f2998f13ba540d30eb2" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.60.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "33fcb1c3360be2458262a46a465dd4d2" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.60.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "d5d87bf6aacdb86ae72c0f3f2ab24136" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.60.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "436595d419f45d48a22016897a979a00" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.60.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "0dd8c81f34719cea43ccd5eede71862a" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 462422016, "records": [ { "name": "language_model.model.layers.61.mlp.gate_up_proj.weight", "shape": [ 43008, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 462422016, "byteOffset": 0 } ], "md5sum": "97ad4d9aac8a0250c0b9a1ac8c62031f" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.61.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "570f4ea330519b548a04f68c6101daab" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.61.self_attn.o_proj.weight", "shape": [ 5376, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "f5f359deb7a484ea360f7dff5588296e" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 44040192, "records": [ { "name": "language_model.model.layers.61.self_attn.q_proj.weight", "shape": [ 4096, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 44040192, "byteOffset": 0 } ], "md5sum": "86f0b1c102ce5850213528066146e448" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "language_model.model.layers.61.self_attn.v_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 0 } ], "md5sum": "b1d85991e269b59d4b9c308c4115258d" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 231211008, "records": [ { "name": "language_model.model.layers.61.mlp.down_proj.weight", "shape": [ 5376, 21504 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 231211008, "byteOffset": 0 } ], "md5sum": "0c508bb9081f12affe33cccd2d72c033" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 24729088, "records": [ { "name": "language_model.model.layers.0.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 0 }, { "name": "language_model.model.layers.0.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 10752 }, { "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 21504 }, { "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 32256 }, { "name": "language_model.model.layers.0.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 43008 }, { "name": "language_model.model.layers.0.self_attn.k_proj.weight", "shape": [ 2048, 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 22020096, "byteOffset": 43264 }, { "name": "language_model.model.layers.0.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22063360 }, { "name": "language_model.model.layers.1.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22063616 }, { "name": "language_model.model.layers.1.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22063872 }, { "name": "language_model.model.layers.1.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22064128 }, { "name": "language_model.model.layers.1.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22074880 }, { "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22085632 }, { "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22096384 }, { "name": "language_model.model.layers.2.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22107136 }, { "name": "language_model.model.layers.2.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22117888 }, { "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22128640 }, { "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22139392 }, { "name": "language_model.model.layers.2.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22150144 }, { "name": "language_model.model.layers.2.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22150400 }, { "name": "language_model.model.layers.3.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22150656 }, { "name": "language_model.model.layers.3.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22161408 }, { "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22172160 }, { "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22182912 }, { "name": "language_model.model.layers.3.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22193664 }, { "name": "language_model.model.layers.3.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22193920 }, { "name": "language_model.model.layers.4.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22194176 }, { "name": "language_model.model.layers.4.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22204928 }, { "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22215680 }, { "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22226432 }, { "name": "language_model.model.layers.4.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22237184 }, { "name": "language_model.model.layers.4.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22237440 }, { "name": "language_model.model.layers.5.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22237696 }, { "name": "language_model.model.layers.5.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22248448 }, { "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22259200 }, { "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22269952 }, { "name": "language_model.model.layers.5.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22280704 }, { "name": "language_model.model.layers.5.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22280960 }, { "name": "language_model.model.layers.6.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22281216 }, { "name": "language_model.model.layers.6.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22291968 }, { "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22302720 }, { "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22313472 }, { "name": "language_model.model.layers.6.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22324224 }, { "name": "language_model.model.layers.6.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22324480 }, { "name": "language_model.model.layers.7.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22324736 }, { "name": "language_model.model.layers.7.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22324992 }, { "name": "language_model.model.layers.10.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22325248 }, { "name": "language_model.model.layers.10.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22336000 }, { "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22346752 }, { "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22357504 }, { "name": "language_model.model.layers.10.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22368256 }, { "name": "language_model.model.layers.10.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22368512 }, { "name": "language_model.model.layers.11.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22368768 }, { "name": "language_model.model.layers.11.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22379520 }, { "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22390272 }, { "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22401024 }, { "name": "language_model.model.layers.11.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22411776 }, { "name": "language_model.model.layers.11.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22412032 }, { "name": "language_model.model.layers.12.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22412288 }, { "name": "language_model.model.layers.12.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22423040 }, { "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22433792 }, { "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22444544 }, { "name": "language_model.model.layers.12.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22455296 }, { "name": "language_model.model.layers.12.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22455552 }, { "name": "language_model.model.layers.13.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22455808 }, { "name": "language_model.model.layers.13.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22456064 }, { "name": "language_model.model.layers.7.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22456320 }, { "name": "language_model.model.layers.7.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22467072 }, { "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22477824 }, { "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22488576 }, { "name": "language_model.model.layers.8.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22499328 }, { "name": "language_model.model.layers.8.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22510080 }, { "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22520832 }, { "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22531584 }, { "name": "language_model.model.layers.8.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22542336 }, { "name": "language_model.model.layers.8.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22542592 }, { "name": "language_model.model.layers.9.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22542848 }, { "name": "language_model.model.layers.9.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22553600 }, { "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22564352 }, { "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22575104 }, { "name": "language_model.model.layers.9.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22585856 }, { "name": "language_model.model.layers.9.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22586112 }, { "name": "language_model.model.layers.13.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22586368 }, { "name": "language_model.model.layers.13.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22597120 }, { "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22607872 }, { "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22618624 }, { "name": "language_model.model.layers.14.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22629376 }, { "name": "language_model.model.layers.14.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22640128 }, { "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22650880 }, { "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22661632 }, { "name": "language_model.model.layers.14.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22672384 }, { "name": "language_model.model.layers.14.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22672640 }, { "name": "language_model.model.layers.15.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22672896 }, { "name": "language_model.model.layers.15.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22683648 }, { "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22694400 }, { "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22705152 }, { "name": "language_model.model.layers.15.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22715904 }, { "name": "language_model.model.layers.15.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22716160 }, { "name": "language_model.model.layers.16.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22716416 }, { "name": "language_model.model.layers.16.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22727168 }, { "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22737920 }, { "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22748672 }, { "name": "language_model.model.layers.16.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22759424 }, { "name": "language_model.model.layers.16.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22759680 }, { "name": "language_model.model.layers.17.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22759936 }, { "name": "language_model.model.layers.17.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22770688 }, { "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22781440 }, { "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22792192 }, { "name": "language_model.model.layers.17.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22802944 }, { "name": "language_model.model.layers.17.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22803200 }, { "name": "language_model.model.layers.18.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22803456 }, { "name": "language_model.model.layers.18.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22814208 }, { "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22824960 }, { "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22835712 }, { "name": "language_model.model.layers.18.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22846464 }, { "name": "language_model.model.layers.18.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22846720 }, { "name": "language_model.model.layers.19.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22846976 }, { "name": "language_model.model.layers.19.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22847232 }, { "name": "language_model.model.layers.19.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22847488 }, { "name": "language_model.model.layers.19.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22858240 }, { "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22868992 }, { "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22879744 }, { "name": "language_model.model.layers.20.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22890496 }, { "name": "language_model.model.layers.20.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22901248 }, { "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22912000 }, { "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22922752 }, { "name": "language_model.model.layers.20.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22933504 }, { "name": "language_model.model.layers.20.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22933760 }, { "name": "language_model.model.layers.21.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22934016 }, { "name": "language_model.model.layers.21.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22944768 }, { "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22955520 }, { "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22966272 }, { "name": "language_model.model.layers.21.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22977024 }, { "name": "language_model.model.layers.21.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22977280 }, { "name": "language_model.model.layers.22.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22977536 }, { "name": "language_model.model.layers.22.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22988288 }, { "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 22999040 }, { "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23009792 }, { "name": "language_model.model.layers.22.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23020544 }, { "name": "language_model.model.layers.22.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23020800 }, { "name": "language_model.model.layers.23.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23021056 }, { "name": "language_model.model.layers.23.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23031808 }, { "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23042560 }, { "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23053312 }, { "name": "language_model.model.layers.23.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23064064 }, { "name": "language_model.model.layers.23.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23064320 }, { "name": "language_model.model.layers.24.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23064576 }, { "name": "language_model.model.layers.24.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23075328 }, { "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23086080 }, { "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23096832 }, { "name": "language_model.model.layers.24.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23107584 }, { "name": "language_model.model.layers.24.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23107840 }, { "name": "language_model.model.layers.25.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23108096 }, { "name": "language_model.model.layers.25.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23108352 }, { "name": "language_model.model.layers.25.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23108608 }, { "name": "language_model.model.layers.25.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23119360 }, { "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23130112 }, { "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23140864 }, { "name": "language_model.model.layers.26.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23151616 }, { "name": "language_model.model.layers.26.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23162368 }, { "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23173120 }, { "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23183872 }, { "name": "language_model.model.layers.26.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23194624 }, { "name": "language_model.model.layers.26.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23194880 }, { "name": "language_model.model.layers.27.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23195136 }, { "name": "language_model.model.layers.27.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23205888 }, { "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23216640 }, { "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23227392 }, { "name": "language_model.model.layers.27.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23238144 }, { "name": "language_model.model.layers.27.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23238400 }, { "name": "language_model.model.layers.28.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23238656 }, { "name": "language_model.model.layers.28.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23249408 }, { "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23260160 }, { "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23270912 }, { "name": "language_model.model.layers.28.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23281664 }, { "name": "language_model.model.layers.28.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23281920 }, { "name": "language_model.model.layers.29.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23282176 }, { "name": "language_model.model.layers.29.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23292928 }, { "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23303680 }, { "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23314432 }, { "name": "language_model.model.layers.29.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23325184 }, { "name": "language_model.model.layers.29.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23325440 }, { "name": "language_model.model.layers.30.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23325696 }, { "name": "language_model.model.layers.30.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23336448 }, { "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23347200 }, { "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23357952 }, { "name": "language_model.model.layers.30.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23368704 }, { "name": "language_model.model.layers.30.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23368960 }, { "name": "language_model.model.layers.31.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23369216 }, { "name": "language_model.model.layers.31.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23369472 }, { "name": "language_model.model.layers.31.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23369728 }, { "name": "language_model.model.layers.31.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23380480 }, { "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23391232 }, { "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23401984 }, { "name": "language_model.model.layers.32.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23412736 }, { "name": "language_model.model.layers.32.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23423488 }, { "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23434240 }, { "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23444992 }, { "name": "language_model.model.layers.32.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23455744 }, { "name": "language_model.model.layers.32.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23456000 }, { "name": "language_model.model.layers.33.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23456256 }, { "name": "language_model.model.layers.33.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23467008 }, { "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23477760 }, { "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23488512 }, { "name": "language_model.model.layers.33.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23499264 }, { "name": "language_model.model.layers.33.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23499520 }, { "name": "language_model.model.layers.34.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23499776 }, { "name": "language_model.model.layers.34.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23510528 }, { "name": "language_model.model.layers.34.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23521280 }, { "name": "language_model.model.layers.34.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23532032 }, { "name": "language_model.model.layers.34.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23542784 }, { "name": "language_model.model.layers.34.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23543040 }, { "name": "language_model.model.layers.35.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23543296 }, { "name": "language_model.model.layers.35.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23554048 }, { "name": "language_model.model.layers.35.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23564800 }, { "name": "language_model.model.layers.35.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23575552 }, { "name": "language_model.model.layers.35.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23586304 }, { "name": "language_model.model.layers.35.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23586560 }, { "name": "language_model.model.layers.36.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23586816 }, { "name": "language_model.model.layers.36.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23597568 }, { "name": "language_model.model.layers.36.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23608320 }, { "name": "language_model.model.layers.36.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23619072 }, { "name": "language_model.model.layers.36.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23629824 }, { "name": "language_model.model.layers.36.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23630080 }, { "name": "language_model.model.layers.37.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23630336 }, { "name": "language_model.model.layers.37.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23630592 }, { "name": "language_model.model.layers.37.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23630848 }, { "name": "language_model.model.layers.37.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23641600 }, { "name": "language_model.model.layers.37.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23652352 }, { "name": "language_model.model.layers.37.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23663104 }, { "name": "language_model.model.layers.38.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23673856 }, { "name": "language_model.model.layers.38.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23684608 }, { "name": "language_model.model.layers.38.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23695360 }, { "name": "language_model.model.layers.38.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23706112 }, { "name": "language_model.model.layers.38.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23716864 }, { "name": "language_model.model.layers.38.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23717120 }, { "name": "language_model.model.layers.39.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23717376 }, { "name": "language_model.model.layers.39.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23728128 }, { "name": "language_model.model.layers.39.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23738880 }, { "name": "language_model.model.layers.39.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23749632 }, { "name": "language_model.model.layers.39.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23760384 }, { "name": "language_model.model.layers.39.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23760640 }, { "name": "language_model.model.layers.40.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23760896 }, { "name": "language_model.model.layers.40.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23771648 }, { "name": "language_model.model.layers.40.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23782400 }, { "name": "language_model.model.layers.40.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23793152 }, { "name": "language_model.model.layers.40.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23803904 }, { "name": "language_model.model.layers.40.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23804160 }, { "name": "language_model.model.layers.41.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23804416 }, { "name": "language_model.model.layers.41.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23815168 }, { "name": "language_model.model.layers.41.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23825920 }, { "name": "language_model.model.layers.41.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23836672 }, { "name": "language_model.model.layers.41.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23847424 }, { "name": "language_model.model.layers.41.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23847680 }, { "name": "language_model.model.layers.42.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23847936 }, { "name": "language_model.model.layers.42.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23858688 }, { "name": "language_model.model.layers.42.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23869440 }, { "name": "language_model.model.layers.42.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23880192 }, { "name": "language_model.model.layers.42.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23890944 }, { "name": "language_model.model.layers.42.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23891200 }, { "name": "language_model.model.layers.43.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23891456 }, { "name": "language_model.model.layers.43.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23891712 }, { "name": "language_model.model.layers.43.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23891968 }, { "name": "language_model.model.layers.43.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23902720 }, { "name": "language_model.model.layers.43.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23913472 }, { "name": "language_model.model.layers.43.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23924224 }, { "name": "language_model.model.layers.44.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23934976 }, { "name": "language_model.model.layers.44.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23945728 }, { "name": "language_model.model.layers.44.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23956480 }, { "name": "language_model.model.layers.44.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23967232 }, { "name": "language_model.model.layers.44.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23977984 }, { "name": "language_model.model.layers.44.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23978240 }, { "name": "language_model.model.layers.45.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23978496 }, { "name": "language_model.model.layers.45.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 23989248 }, { "name": "language_model.model.layers.45.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24000000 }, { "name": "language_model.model.layers.45.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24010752 }, { "name": "language_model.model.layers.45.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24021504 }, { "name": "language_model.model.layers.45.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24021760 }, { "name": "language_model.model.layers.46.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24022016 }, { "name": "language_model.model.layers.46.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24032768 }, { "name": "language_model.model.layers.46.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24043520 }, { "name": "language_model.model.layers.46.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24054272 }, { "name": "language_model.model.layers.46.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24065024 }, { "name": "language_model.model.layers.46.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24065280 }, { "name": "language_model.model.layers.47.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24065536 }, { "name": "language_model.model.layers.47.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24076288 }, { "name": "language_model.model.layers.47.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24087040 }, { "name": "language_model.model.layers.47.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24097792 }, { "name": "language_model.model.layers.47.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24108544 }, { "name": "language_model.model.layers.47.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24108800 }, { "name": "language_model.model.layers.48.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24109056 }, { "name": "language_model.model.layers.48.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24119808 }, { "name": "language_model.model.layers.48.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24130560 }, { "name": "language_model.model.layers.48.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24141312 }, { "name": "language_model.model.layers.48.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24152064 }, { "name": "language_model.model.layers.48.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24152320 }, { "name": "language_model.model.layers.49.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24152576 }, { "name": "language_model.model.layers.49.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24152832 }, { "name": "language_model.model.layers.49.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24153088 }, { "name": "language_model.model.layers.49.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24163840 }, { "name": "language_model.model.layers.49.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24174592 }, { "name": "language_model.model.layers.49.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24185344 }, { "name": "language_model.model.layers.50.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24196096 }, { "name": "language_model.model.layers.50.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24206848 }, { "name": "language_model.model.layers.50.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24217600 }, { "name": "language_model.model.layers.50.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24228352 }, { "name": "language_model.model.layers.50.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24239104 }, { "name": "language_model.model.layers.50.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24239360 }, { "name": "language_model.model.layers.51.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24239616 }, { "name": "language_model.model.layers.51.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24250368 }, { "name": "language_model.model.layers.51.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24261120 }, { "name": "language_model.model.layers.51.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24271872 }, { "name": "language_model.model.layers.51.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24282624 }, { "name": "language_model.model.layers.51.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24282880 }, { "name": "language_model.model.layers.52.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24283136 }, { "name": "language_model.model.layers.52.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24293888 }, { "name": "language_model.model.layers.52.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24304640 }, { "name": "language_model.model.layers.52.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24315392 }, { "name": "language_model.model.layers.52.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24326144 }, { "name": "language_model.model.layers.52.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24326400 }, { "name": "language_model.model.layers.53.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24326656 }, { "name": "language_model.model.layers.53.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24337408 }, { "name": "language_model.model.layers.53.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24348160 }, { "name": "language_model.model.layers.53.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24358912 }, { "name": "language_model.model.layers.53.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24369664 }, { "name": "language_model.model.layers.53.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24369920 }, { "name": "language_model.model.layers.54.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24370176 }, { "name": "language_model.model.layers.54.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24380928 }, { "name": "language_model.model.layers.54.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24391680 }, { "name": "language_model.model.layers.54.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24402432 }, { "name": "language_model.model.layers.54.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24413184 }, { "name": "language_model.model.layers.54.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24413440 }, { "name": "language_model.model.layers.55.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24413696 }, { "name": "language_model.model.layers.55.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24413952 }, { "name": "language_model.model.layers.55.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24414208 }, { "name": "language_model.model.layers.55.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24424960 }, { "name": "language_model.model.layers.55.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24435712 }, { "name": "language_model.model.layers.55.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24446464 }, { "name": "language_model.model.layers.56.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24457216 }, { "name": "language_model.model.layers.56.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24467968 }, { "name": "language_model.model.layers.56.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24478720 }, { "name": "language_model.model.layers.56.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24489472 }, { "name": "language_model.model.layers.56.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24500224 }, { "name": "language_model.model.layers.56.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24500480 }, { "name": "language_model.model.layers.57.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24500736 }, { "name": "language_model.model.layers.57.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24511488 }, { "name": "language_model.model.layers.57.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24522240 }, { "name": "language_model.model.layers.57.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24532992 }, { "name": "language_model.model.layers.57.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24543744 }, { "name": "language_model.model.layers.57.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24544000 }, { "name": "language_model.model.layers.58.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24544256 }, { "name": "language_model.model.layers.58.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24555008 }, { "name": "language_model.model.layers.58.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24565760 }, { "name": "language_model.model.layers.58.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24576512 }, { "name": "language_model.model.layers.58.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24587264 }, { "name": "language_model.model.layers.58.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24587520 }, { "name": "language_model.model.layers.59.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24587776 }, { "name": "language_model.model.layers.59.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24598528 }, { "name": "language_model.model.layers.59.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24609280 }, { "name": "language_model.model.layers.59.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24620032 }, { "name": "language_model.model.layers.59.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24630784 }, { "name": "language_model.model.layers.59.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24631040 }, { "name": "language_model.model.layers.60.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24631296 }, { "name": "language_model.model.layers.60.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24642048 }, { "name": "language_model.model.layers.60.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24652800 }, { "name": "language_model.model.layers.60.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24663552 }, { "name": "language_model.model.layers.60.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24674304 }, { "name": "language_model.model.layers.60.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24674560 }, { "name": "language_model.model.layers.61.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24674816 }, { "name": "language_model.model.layers.61.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24675072 }, { "name": "language_model.model.layers.61.input_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24675328 }, { "name": "language_model.model.layers.61.post_attention_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24686080 }, { "name": "language_model.model.layers.61.post_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24696832 }, { "name": "language_model.model.layers.61.pre_feedforward_layernorm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24707584 }, { "name": "language_model.model.norm.weight", "shape": [ 5376 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10752, "byteOffset": 24718336 } ], "md5sum": "b55393954468d05176fc671a577cbf72" } ] }