diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3213 @@ +{ + "metadata": { + "ParamSize": 245, + "ParamBytes": 5673082880.0, + "BitsPerParam": 32.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 209715200, + "records": [ + { + "name": "transformer.embd.weight", + "shape": [ + 51200, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 209715200, + "byteOffset": 0 + } + ], + "md5sum": "a8605021ea053754a003da8f06fbc97d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25186304, + "records": [ + { + "name": "transformer.h.0.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 4096 + }, + { + "name": "transformer.h.0.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 8192 + }, + { + "name": "transformer.h.0.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 25174016 + } + ], + "md5sum": "67ccf81e4bd8f1e274cd98665830a10c" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.0.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e076ced0c11b935afabbba1275aa7aee" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.0.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "198a6696ab66cfc44e6de5ca933714a0" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.1.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a42e1763dc3b7270c58676580dd1b6ef" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.1.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2a166ab4945052ec0e29831b6a359eda" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.1.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "17a86d7400a79bc95cf1e9ffe6d4d731" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.2.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6f63c27ca3c8968b879f35738d05b4b6" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.2.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "42c6a506ee2b498aa4785c39c083ecb5" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.2.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "405d421b4f39a5e1d9b0cd86d00d004c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.3.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "13e8da8950bd42c86f47e5661c533733" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25300992, + "records": [ + { + "name": "transformer.h.0.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.0.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.0.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8409088 + }, + { + "name": "transformer.h.1.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8413184 + }, + { + "name": "transformer.h.1.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8417280 + }, + { + "name": "transformer.h.1.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 8421376 + }, + { + "name": "transformer.h.1.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8433664 + }, + { + "name": "transformer.h.1.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16822272 + }, + { + "name": "transformer.h.1.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16826368 + }, + { + "name": "transformer.h.1.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16842752 + }, + { + "name": "transformer.h.2.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16846848 + }, + { + "name": "transformer.h.2.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16850944 + }, + { + "name": "transformer.h.2.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 16855040 + }, + { + "name": "transformer.h.2.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16867328 + }, + { + "name": "transformer.h.2.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25255936 + }, + { + "name": "transformer.h.2.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 25260032 + }, + { + "name": "transformer.h.2.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25276416 + }, + { + "name": "transformer.h.3.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25280512 + }, + { + "name": "transformer.h.3.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25284608 + }, + { + "name": "transformer.h.3.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 25288704 + } + ], + "md5sum": "441d71a59fbda076c591f71a0e1c0341" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.3.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d1bc93af8197956ea694b53bbbc89327" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.3.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "925af2d99c71e060c03fd75185b1f497" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.4.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bef40ffab99d9a572e2eacf9587b7cde" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.4.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "02e3a1ccebee6388c275db73dbf80ccd" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.4.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b5d7ca5772e5a0cdf368c2e97fdaf1da" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.5.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "016bacfca2f07ba2ce7eae7994892885" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.5.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5f2dde01733bdfc9c4dee4a748f5a0d5" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.5.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "be3112ade0c6036995022be14d66df62" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.6.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f2d5888e1d00543276a2111e4070ad9d" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25300992, + "records": [ + { + "name": "transformer.h.3.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.3.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.3.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8409088 + }, + { + "name": "transformer.h.4.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8413184 + }, + { + "name": "transformer.h.4.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8417280 + }, + { + "name": "transformer.h.4.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 8421376 + }, + { + "name": "transformer.h.4.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8433664 + }, + { + "name": "transformer.h.4.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16822272 + }, + { + "name": "transformer.h.4.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16826368 + }, + { + "name": "transformer.h.4.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16842752 + }, + { + "name": "transformer.h.5.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16846848 + }, + { + "name": "transformer.h.5.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16850944 + }, + { + "name": "transformer.h.5.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 16855040 + }, + { + "name": "transformer.h.5.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16867328 + }, + { + "name": "transformer.h.5.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25255936 + }, + { + "name": "transformer.h.5.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 25260032 + }, + { + "name": "transformer.h.5.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25276416 + }, + { + "name": "transformer.h.6.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25280512 + }, + { + "name": "transformer.h.6.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25284608 + }, + { + "name": "transformer.h.6.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 25288704 + } + ], + "md5sum": "f54084a552b984c57670e88319c99771" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.6.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bd13cb4ffcdafbc8a8c3ec1bc26ffbaf" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.6.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "afd5111978ff8a2ead2c1c5e859431f4" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.7.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c6033c3bf1cc8161c0a24e3b7565e1ef" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.7.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "78bfee79747e8f2880fefaca8e28e590" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.7.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7a0db5ea5b8d44adec0d55b846352a55" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.8.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1bf2abff697e6cee5082d2698d63b91b" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.8.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "87f8eded440503b68b96ce961c60b8d3" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.8.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "135bc520b75aa6bf960914a4065ee855" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.9.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5e0a304a5407da6b6573bfcf6aff681c" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 25300992, + "records": [ + { + "name": "transformer.h.6.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.6.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.6.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8409088 + }, + { + "name": "transformer.h.7.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8413184 + }, + { + "name": "transformer.h.7.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8417280 + }, + { + "name": "transformer.h.7.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 8421376 + }, + { + "name": "transformer.h.7.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8433664 + }, + { + "name": "transformer.h.7.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16822272 + }, + { + "name": "transformer.h.7.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16826368 + }, + { + "name": "transformer.h.7.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16842752 + }, + { + "name": "transformer.h.8.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16846848 + }, + { + "name": "transformer.h.8.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16850944 + }, + { + "name": "transformer.h.8.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 16855040 + }, + { + "name": "transformer.h.8.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16867328 + }, + { + "name": "transformer.h.8.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25255936 + }, + { + "name": "transformer.h.8.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 25260032 + }, + { + "name": "transformer.h.8.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25276416 + }, + { + "name": "transformer.h.9.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25280512 + }, + { + "name": "transformer.h.9.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25284608 + }, + { + "name": "transformer.h.9.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 25288704 + } + ], + "md5sum": "12e43363710fe56560b9be1e4dd6a4f1" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.9.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2f9f098ffe6ca3e9d853a1ae53ee4151" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.9.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e7f8c47a549d5e01028890dcc9eb8ba9" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.10.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d50102e665e1d8c4b0094615b04b45ac" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.10.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6a9e62f01eeae00ed53a93107dd8a9c5" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.10.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2033ed998e461a7ec8fa4e5df62a6c02" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.11.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "440f10657fee337289c0343622595462" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.11.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2df3ebab92cc0570ee88d456dc5b4cb7" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.11.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "250a12796af0e76d01461b073fd3afd2" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.12.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "10bc62ad9d221af31a6ddf798ba44ef4" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 25300992, + "records": [ + { + "name": "transformer.h.9.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.9.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.9.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8409088 + }, + { + "name": "transformer.h.10.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8413184 + }, + { + "name": "transformer.h.10.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8417280 + }, + { + "name": "transformer.h.10.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 8421376 + }, + { + "name": "transformer.h.10.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8433664 + }, + { + "name": "transformer.h.10.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16822272 + }, + { + "name": "transformer.h.10.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16826368 + }, + { + "name": "transformer.h.10.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16842752 + }, + { + "name": "transformer.h.11.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16846848 + }, + { + "name": "transformer.h.11.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16850944 + }, + { + "name": "transformer.h.11.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 16855040 + }, + { + "name": "transformer.h.11.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16867328 + }, + { + "name": "transformer.h.11.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25255936 + }, + { + "name": "transformer.h.11.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 25260032 + }, + { + "name": "transformer.h.11.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25276416 + }, + { + "name": "transformer.h.12.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25280512 + }, + { + "name": "transformer.h.12.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25284608 + }, + { + "name": "transformer.h.12.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 25288704 + } + ], + "md5sum": "82bb22384077785e0eb8e0a2309454a2" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.12.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cb7e65ae8e7fd6a12eac45eae124ea56" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.12.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "07de806a9ab6d49f16208a649dac1df6" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.13.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3115476f8181c9d5cd4277a1a7118cd2" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.13.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e0cfc90e608da3cb5f6b167c7a65fc01" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.13.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "743333866391d30086fc0412c7974e48" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.14.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6dd8f20986ccc6c22d5d2de9b3af2171" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.14.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "80f482a951f4542f92154d58cd6ee250" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.14.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "546c5882774c0f0d72aa31274e68a678" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.15.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a3519b85eff52ce45b8bc1c89277a795" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 25300992, + "records": [ + { + "name": "transformer.h.12.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.12.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.12.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8409088 + }, + { + "name": "transformer.h.13.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8413184 + }, + { + "name": "transformer.h.13.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8417280 + }, + { + "name": "transformer.h.13.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 8421376 + }, + { + "name": "transformer.h.13.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8433664 + }, + { + "name": "transformer.h.13.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16822272 + }, + { + "name": "transformer.h.13.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16826368 + }, + { + "name": "transformer.h.13.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16842752 + }, + { + "name": "transformer.h.14.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16846848 + }, + { + "name": "transformer.h.14.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16850944 + }, + { + "name": "transformer.h.14.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 16855040 + }, + { + "name": "transformer.h.14.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16867328 + }, + { + "name": "transformer.h.14.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25255936 + }, + { + "name": "transformer.h.14.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 25260032 + }, + { + "name": "transformer.h.14.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25276416 + }, + { + "name": "transformer.h.15.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25280512 + }, + { + "name": "transformer.h.15.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25284608 + }, + { + "name": "transformer.h.15.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 25288704 + } + ], + "md5sum": "2b0d035e10be953ad8acb4477ac954b8" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.15.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "36124fd7674b8054eac914c55267cea2" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.15.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "eb9a9f7aa1a2da8c6a4a8ff003dd435b" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.16.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7f9c42499cb13bbd3a310ea96e9064b3" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.16.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a0b2478ffd2fbe50d8d67271a1935e63" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.16.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6da03f35fe9e01208df697422aca69fd" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.17.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "22df39c78cbf0398b68f0ded35cc032a" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.17.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "68b2acafe9398e0336e8ddd4ea783809" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.17.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d9240b93257a5354fc2a02f3e98bd6c3" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.18.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d541e5db82d9f19e5734e14afe10331d" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 25300992, + "records": [ + { + "name": "transformer.h.15.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.15.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.15.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8409088 + }, + { + "name": "transformer.h.16.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8413184 + }, + { + "name": "transformer.h.16.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8417280 + }, + { + "name": "transformer.h.16.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 8421376 + }, + { + "name": "transformer.h.16.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8433664 + }, + { + "name": "transformer.h.16.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16822272 + }, + { + "name": "transformer.h.16.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16826368 + }, + { + "name": "transformer.h.16.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16842752 + }, + { + "name": "transformer.h.17.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16846848 + }, + { + "name": "transformer.h.17.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16850944 + }, + { + "name": "transformer.h.17.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 16855040 + }, + { + "name": "transformer.h.17.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16867328 + }, + { + "name": "transformer.h.17.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25255936 + }, + { + "name": "transformer.h.17.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 25260032 + }, + { + "name": "transformer.h.17.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25276416 + }, + { + "name": "transformer.h.18.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25280512 + }, + { + "name": "transformer.h.18.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25284608 + }, + { + "name": "transformer.h.18.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 25288704 + } + ], + "md5sum": "04f67f56ff47ef38c4980a7941ebd71c" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.18.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "edeed64f99b4db0b91c2ca73906eeefa" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.18.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "06ea8110c08fc61ac217f0ca00af3f11" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.19.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "993ce6b239a07be350df994ad0f5f55a" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.19.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "56d8bd166f3938d314fb357ba5b6af29" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.19.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a1953efdb537fed81ad3ee64813c8af6" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.20.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "91627af652fdfd177f3ffc862f9b85fd" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.20.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6e3ced2a50b0cc260d02e86a4aae234d" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.20.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f47401d11940ed82b90157a561fc3725" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.21.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "263ad0599fdc7e6a1207447cf819f232" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25300992, + "records": [ + { + "name": "transformer.h.18.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.18.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.18.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8409088 + }, + { + "name": "transformer.h.19.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8413184 + }, + { + "name": "transformer.h.19.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8417280 + }, + { + "name": "transformer.h.19.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 8421376 + }, + { + "name": "transformer.h.19.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8433664 + }, + { + "name": "transformer.h.19.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16822272 + }, + { + "name": "transformer.h.19.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16826368 + }, + { + "name": "transformer.h.19.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16842752 + }, + { + "name": "transformer.h.20.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16846848 + }, + { + "name": "transformer.h.20.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16850944 + }, + { + "name": "transformer.h.20.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 16855040 + }, + { + "name": "transformer.h.20.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16867328 + }, + { + "name": "transformer.h.20.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25255936 + }, + { + "name": "transformer.h.20.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 25260032 + }, + { + "name": "transformer.h.20.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25276416 + }, + { + "name": "transformer.h.21.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25280512 + }, + { + "name": "transformer.h.21.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25284608 + }, + { + "name": "transformer.h.21.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 25288704 + } + ], + "md5sum": "97afffba16275ee5e5d147fa1c788d18" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.21.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2a430f95773ba9d74b2bf46981956085" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.21.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8ae40c479c8f397c8e71b1886c095bf5" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.22.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "68a29572f1f4b30a706781a1de068c25" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.22.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1e7e0e836e24c1b866241b95cfa51fc8" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.22.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a3c5ae69e214eb5968920a699625b8a0" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.h.23.mixer.Wqkv.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "969f0f260ff937665b48356e86d551b8" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.23.mlp.fc1.weight", + "shape": [ + 8192, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "58deb32cec3dc98c9d3dbdc08819de78" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "transformer.h.23.mlp.fc2.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4facc13015040b6e09345b47bdcd8d80" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 209715200, + "records": [ + { + "name": "lm_head.linear.weight", + "shape": [ + 51200, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 209715200, + "byteOffset": 0 + } + ], + "md5sum": "f3be3e3e8addb4c825a8151cf42860a3" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 25391104, + "records": [ + { + "name": "transformer.h.21.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.21.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.21.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8409088 + }, + { + "name": "transformer.h.22.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8413184 + }, + { + "name": "transformer.h.22.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8417280 + }, + { + "name": "transformer.h.22.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 8421376 + }, + { + "name": "transformer.h.22.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8433664 + }, + { + "name": "transformer.h.22.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16822272 + }, + { + "name": "transformer.h.22.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16826368 + }, + { + "name": "transformer.h.22.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16842752 + }, + { + "name": "transformer.h.23.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16846848 + }, + { + "name": "transformer.h.23.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16850944 + }, + { + "name": "transformer.h.23.mixer.Wqkv.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 16855040 + }, + { + "name": "transformer.h.23.mixer.out_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16867328 + }, + { + "name": "transformer.h.23.mixer.out_proj.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25255936 + }, + { + "name": "transformer.h.23.mlp.fc1.bias", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 25260032 + }, + { + "name": "transformer.h.23.mlp.fc2.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25276416 + }, + { + "name": "lm_head.ln.weight", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25280512 + }, + { + "name": "lm_head.ln.bias", + "shape": [ + 2048 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25284608 + }, + { + "name": "lm_head.linear.bias", + "shape": [ + 51200 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 102400, + "byteOffset": 25288704 + } + ], + "md5sum": "119a470d69bbc09cc49391e3cfcf5c9f" + } + ] +} \ No newline at end of file