{ "metadata": { "ParamSize": 245, "ParamBytes": 5673082880.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 209715200, "records": [ { "name": "transformer.embd.weight", "shape": [ 51200, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 209715200, "byteOffset": 0 } ], "md5sum": "a8605021ea053754a003da8f06fbc97d" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25186304, "records": [ { "name": "transformer.h.0.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 0 }, { "name": "transformer.h.0.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 4096 }, { "name": "transformer.h.0.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 8192 }, { "name": "transformer.h.0.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25174016 } ], "md5sum": "67ccf81e4bd8f1e274cd98665830a10c" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.0.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e076ced0c11b935afabbba1275aa7aee" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.0.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "198a6696ab66cfc44e6de5ca933714a0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.1.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a42e1763dc3b7270c58676580dd1b6ef" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.1.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2a166ab4945052ec0e29831b6a359eda" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.1.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "17a86d7400a79bc95cf1e9ffe6d4d731" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.2.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6f63c27ca3c8968b879f35738d05b4b6" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.2.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "42c6a506ee2b498aa4785c39c083ecb5" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.2.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "405d421b4f39a5e1d9b0cd86d00d004c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.3.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "13e8da8950bd42c86f47e5661c533733" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.0.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.0.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.0.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.0.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.1.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.1.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.1.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.1.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.1.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.1.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.1.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.2.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.2.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.2.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.2.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.2.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.2.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.2.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.3.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.3.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.3.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "441d71a59fbda076c591f71a0e1c0341" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.3.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d1bc93af8197956ea694b53bbbc89327" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.3.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "925af2d99c71e060c03fd75185b1f497" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.4.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bef40ffab99d9a572e2eacf9587b7cde" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.4.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "02e3a1ccebee6388c275db73dbf80ccd" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.4.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b5d7ca5772e5a0cdf368c2e97fdaf1da" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.5.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "016bacfca2f07ba2ce7eae7994892885" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.5.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5f2dde01733bdfc9c4dee4a748f5a0d5" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.5.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "be3112ade0c6036995022be14d66df62" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.6.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f2d5888e1d00543276a2111e4070ad9d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.3.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.3.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.3.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.3.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.4.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.4.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.4.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.4.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.4.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.4.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.4.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.5.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.5.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.5.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.5.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.5.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.5.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.5.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.6.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.6.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.6.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "f54084a552b984c57670e88319c99771" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.6.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bd13cb4ffcdafbc8a8c3ec1bc26ffbaf" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.6.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "afd5111978ff8a2ead2c1c5e859431f4" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.7.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c6033c3bf1cc8161c0a24e3b7565e1ef" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.7.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "78bfee79747e8f2880fefaca8e28e590" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.7.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7a0db5ea5b8d44adec0d55b846352a55" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.8.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1bf2abff697e6cee5082d2698d63b91b" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.8.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "87f8eded440503b68b96ce961c60b8d3" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.8.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "135bc520b75aa6bf960914a4065ee855" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.9.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5e0a304a5407da6b6573bfcf6aff681c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.6.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.6.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.6.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.6.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.7.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.7.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.7.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.7.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.7.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.7.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.7.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.8.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.8.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.8.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.8.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.8.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.8.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.8.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.9.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.9.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.9.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "12e43363710fe56560b9be1e4dd6a4f1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.9.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2f9f098ffe6ca3e9d853a1ae53ee4151" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.9.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e7f8c47a549d5e01028890dcc9eb8ba9" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.10.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d50102e665e1d8c4b0094615b04b45ac" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.10.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6a9e62f01eeae00ed53a93107dd8a9c5" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.10.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2033ed998e461a7ec8fa4e5df62a6c02" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.11.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "440f10657fee337289c0343622595462" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.11.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2df3ebab92cc0570ee88d456dc5b4cb7" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.11.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "250a12796af0e76d01461b073fd3afd2" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.12.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "10bc62ad9d221af31a6ddf798ba44ef4" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.9.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.9.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.9.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.9.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.10.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.10.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.10.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.10.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.10.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.10.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.10.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.11.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.11.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.11.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.11.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.11.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.11.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.11.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.12.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.12.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.12.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "82bb22384077785e0eb8e0a2309454a2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.12.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cb7e65ae8e7fd6a12eac45eae124ea56" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.12.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "07de806a9ab6d49f16208a649dac1df6" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.13.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3115476f8181c9d5cd4277a1a7118cd2" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.13.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e0cfc90e608da3cb5f6b167c7a65fc01" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.13.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "743333866391d30086fc0412c7974e48" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.14.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6dd8f20986ccc6c22d5d2de9b3af2171" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.14.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "80f482a951f4542f92154d58cd6ee250" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.14.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "546c5882774c0f0d72aa31274e68a678" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.15.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a3519b85eff52ce45b8bc1c89277a795" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.12.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.12.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.12.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.12.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.13.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.13.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.13.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.13.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.13.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.13.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.13.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.14.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.14.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.14.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.14.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.14.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.14.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.14.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.15.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.15.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.15.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "2b0d035e10be953ad8acb4477ac954b8" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.15.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "36124fd7674b8054eac914c55267cea2" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.15.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "eb9a9f7aa1a2da8c6a4a8ff003dd435b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.16.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7f9c42499cb13bbd3a310ea96e9064b3" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.16.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a0b2478ffd2fbe50d8d67271a1935e63" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.16.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6da03f35fe9e01208df697422aca69fd" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.17.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "22df39c78cbf0398b68f0ded35cc032a" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.17.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "68b2acafe9398e0336e8ddd4ea783809" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.17.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d9240b93257a5354fc2a02f3e98bd6c3" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.18.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d541e5db82d9f19e5734e14afe10331d" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.15.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.15.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.15.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.15.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.16.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.16.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.16.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.16.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.16.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.16.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.16.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.17.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.17.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.17.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.17.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.17.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.17.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.17.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.18.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.18.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.18.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "04f67f56ff47ef38c4980a7941ebd71c" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.18.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "edeed64f99b4db0b91c2ca73906eeefa" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.18.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "06ea8110c08fc61ac217f0ca00af3f11" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.19.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "993ce6b239a07be350df994ad0f5f55a" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.19.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "56d8bd166f3938d314fb357ba5b6af29" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.19.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a1953efdb537fed81ad3ee64813c8af6" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.20.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "91627af652fdfd177f3ffc862f9b85fd" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.20.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e3ced2a50b0cc260d02e86a4aae234d" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.20.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f47401d11940ed82b90157a561fc3725" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.21.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "263ad0599fdc7e6a1207447cf819f232" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25300992, "records": [ { "name": "transformer.h.18.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.18.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.18.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.18.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.19.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.19.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.19.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.19.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.19.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.19.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.19.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.20.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.20.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.20.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.20.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.20.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.20.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.20.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "transformer.h.21.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "transformer.h.21.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "transformer.h.21.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25288704 } ], "md5sum": "97afffba16275ee5e5d147fa1c788d18" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.21.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2a430f95773ba9d74b2bf46981956085" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.21.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8ae40c479c8f397c8e71b1886c095bf5" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.22.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "68a29572f1f4b30a706781a1de068c25" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.22.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1e7e0e836e24c1b866241b95cfa51fc8" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.22.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a3c5ae69e214eb5968920a699625b8a0" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.23.mixer.Wqkv.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "969f0f260ff937665b48356e86d551b8" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.23.mlp.fc1.weight", "shape": [ 8192, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "58deb32cec3dc98c9d3dbdc08819de78" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "transformer.h.23.mlp.fc2.weight", "shape": [ 2048, 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4facc13015040b6e09345b47bdcd8d80" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 209715200, "records": [ { "name": "lm_head.linear.weight", "shape": [ 51200, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 209715200, "byteOffset": 0 } ], "md5sum": "f3be3e3e8addb4c825a8151cf42860a3" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25391104, "records": [ { "name": "transformer.h.21.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.21.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "transformer.h.21.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 8392704 }, { "name": "transformer.h.21.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8409088 }, { "name": "transformer.h.22.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8413184 }, { "name": "transformer.h.22.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8417280 }, { "name": "transformer.h.22.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 8421376 }, { "name": "transformer.h.22.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8433664 }, { "name": "transformer.h.22.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16822272 }, { "name": "transformer.h.22.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 16826368 }, { "name": "transformer.h.22.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16842752 }, { "name": "transformer.h.23.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16846848 }, { "name": "transformer.h.23.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16850944 }, { "name": "transformer.h.23.mixer.Wqkv.bias", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 16855040 }, { "name": "transformer.h.23.mixer.out_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16867328 }, { "name": "transformer.h.23.mixer.out_proj.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25255936 }, { "name": "transformer.h.23.mlp.fc1.bias", "shape": [ 8192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 16384, "byteOffset": 25260032 }, { "name": "transformer.h.23.mlp.fc2.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25276416 }, { "name": "lm_head.ln.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25280512 }, { "name": "lm_head.ln.bias", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25284608 }, { "name": "lm_head.linear.bias", "shape": [ 51200 ], "dtype": "bfloat16", "format": "raw", "nbytes": 102400, "byteOffset": 25288704 } ], "md5sum": "119a470d69bbc09cc49391e3cfcf5c9f" } ] }