phi-1_5-q0f32-MLC / ndarray-cache-b16.json
CharlieFRuan's picture
Initial commit
00342f8 verified
raw
history blame
105 kB
{
"metadata": {
"ParamSize": 245,
"ParamBytes": 5673082880.0,
"BitsPerParam": 32.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 209715200,
"records": [
{
"name": "transformer.embd.weight",
"shape": [
51200,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 209715200,
"byteOffset": 0
}
],
"md5sum": "a8605021ea053754a003da8f06fbc97d"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 25186304,
"records": [
{
"name": "transformer.h.0.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "transformer.h.0.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "transformer.h.0.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 8192
},
{
"name": "transformer.h.0.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25174016
}
],
"md5sum": "67ccf81e4bd8f1e274cd98665830a10c"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.0.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e076ced0c11b935afabbba1275aa7aee"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.0.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "198a6696ab66cfc44e6de5ca933714a0"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.1.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a42e1763dc3b7270c58676580dd1b6ef"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.1.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2a166ab4945052ec0e29831b6a359eda"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.1.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "17a86d7400a79bc95cf1e9ffe6d4d731"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.2.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6f63c27ca3c8968b879f35738d05b4b6"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.2.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "42c6a506ee2b498aa4785c39c083ecb5"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.2.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "405d421b4f39a5e1d9b0cd86d00d004c"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.3.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "13e8da8950bd42c86f47e5661c533733"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25300992,
"records": [
{
"name": "transformer.h.0.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.0.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "transformer.h.0.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "transformer.h.0.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "transformer.h.1.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "transformer.h.1.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 8421376
},
{
"name": "transformer.h.1.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 8433664
},
{
"name": "transformer.h.1.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16822272
},
{
"name": "transformer.h.1.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16826368
},
{
"name": "transformer.h.1.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16842752
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16846848
},
{
"name": "transformer.h.2.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "transformer.h.2.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 16855040
},
{
"name": "transformer.h.2.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 16867328
},
{
"name": "transformer.h.2.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25255936
},
{
"name": "transformer.h.2.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25260032
},
{
"name": "transformer.h.2.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25276416
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25280512
},
{
"name": "transformer.h.3.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25284608
},
{
"name": "transformer.h.3.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25288704
}
],
"md5sum": "441d71a59fbda076c591f71a0e1c0341"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.3.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d1bc93af8197956ea694b53bbbc89327"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.3.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "925af2d99c71e060c03fd75185b1f497"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.4.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bef40ffab99d9a572e2eacf9587b7cde"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.4.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "02e3a1ccebee6388c275db73dbf80ccd"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.4.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b5d7ca5772e5a0cdf368c2e97fdaf1da"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.5.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "016bacfca2f07ba2ce7eae7994892885"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.5.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5f2dde01733bdfc9c4dee4a748f5a0d5"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.5.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "be3112ade0c6036995022be14d66df62"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.6.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f2d5888e1d00543276a2111e4070ad9d"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25300992,
"records": [
{
"name": "transformer.h.3.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.3.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "transformer.h.3.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "transformer.h.3.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "transformer.h.4.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "transformer.h.4.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 8421376
},
{
"name": "transformer.h.4.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 8433664
},
{
"name": "transformer.h.4.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16822272
},
{
"name": "transformer.h.4.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16826368
},
{
"name": "transformer.h.4.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16842752
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16846848
},
{
"name": "transformer.h.5.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "transformer.h.5.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 16855040
},
{
"name": "transformer.h.5.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 16867328
},
{
"name": "transformer.h.5.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25255936
},
{
"name": "transformer.h.5.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25260032
},
{
"name": "transformer.h.5.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25276416
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25280512
},
{
"name": "transformer.h.6.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25284608
},
{
"name": "transformer.h.6.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25288704
}
],
"md5sum": "f54084a552b984c57670e88319c99771"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.6.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bd13cb4ffcdafbc8a8c3ec1bc26ffbaf"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.6.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "afd5111978ff8a2ead2c1c5e859431f4"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.7.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c6033c3bf1cc8161c0a24e3b7565e1ef"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.7.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "78bfee79747e8f2880fefaca8e28e590"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.7.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7a0db5ea5b8d44adec0d55b846352a55"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.8.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1bf2abff697e6cee5082d2698d63b91b"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.8.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "87f8eded440503b68b96ce961c60b8d3"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.8.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "135bc520b75aa6bf960914a4065ee855"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.9.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5e0a304a5407da6b6573bfcf6aff681c"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25300992,
"records": [
{
"name": "transformer.h.6.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.6.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "transformer.h.6.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "transformer.h.6.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "transformer.h.7.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "transformer.h.7.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 8421376
},
{
"name": "transformer.h.7.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 8433664
},
{
"name": "transformer.h.7.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16822272
},
{
"name": "transformer.h.7.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16826368
},
{
"name": "transformer.h.7.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16842752
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16846848
},
{
"name": "transformer.h.8.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "transformer.h.8.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 16855040
},
{
"name": "transformer.h.8.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 16867328
},
{
"name": "transformer.h.8.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25255936
},
{
"name": "transformer.h.8.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25260032
},
{
"name": "transformer.h.8.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25276416
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25280512
},
{
"name": "transformer.h.9.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25284608
},
{
"name": "transformer.h.9.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25288704
}
],
"md5sum": "12e43363710fe56560b9be1e4dd6a4f1"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.9.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2f9f098ffe6ca3e9d853a1ae53ee4151"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.9.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e7f8c47a549d5e01028890dcc9eb8ba9"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.10.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d50102e665e1d8c4b0094615b04b45ac"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.10.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6a9e62f01eeae00ed53a93107dd8a9c5"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.10.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2033ed998e461a7ec8fa4e5df62a6c02"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.11.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "440f10657fee337289c0343622595462"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.11.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2df3ebab92cc0570ee88d456dc5b4cb7"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.11.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "250a12796af0e76d01461b073fd3afd2"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.12.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "10bc62ad9d221af31a6ddf798ba44ef4"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 25300992,
"records": [
{
"name": "transformer.h.9.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.9.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "transformer.h.9.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "transformer.h.9.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "transformer.h.10.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "transformer.h.10.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 8421376
},
{
"name": "transformer.h.10.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 8433664
},
{
"name": "transformer.h.10.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16822272
},
{
"name": "transformer.h.10.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16826368
},
{
"name": "transformer.h.10.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16842752
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16846848
},
{
"name": "transformer.h.11.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "transformer.h.11.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 16855040
},
{
"name": "transformer.h.11.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 16867328
},
{
"name": "transformer.h.11.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25255936
},
{
"name": "transformer.h.11.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25260032
},
{
"name": "transformer.h.11.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25276416
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25280512
},
{
"name": "transformer.h.12.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25284608
},
{
"name": "transformer.h.12.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25288704
}
],
"md5sum": "82bb22384077785e0eb8e0a2309454a2"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.12.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cb7e65ae8e7fd6a12eac45eae124ea56"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.12.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "07de806a9ab6d49f16208a649dac1df6"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.13.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3115476f8181c9d5cd4277a1a7118cd2"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.13.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e0cfc90e608da3cb5f6b167c7a65fc01"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.13.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "743333866391d30086fc0412c7974e48"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.14.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6dd8f20986ccc6c22d5d2de9b3af2171"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.14.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "80f482a951f4542f92154d58cd6ee250"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.14.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "546c5882774c0f0d72aa31274e68a678"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.15.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a3519b85eff52ce45b8bc1c89277a795"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25300992,
"records": [
{
"name": "transformer.h.12.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.12.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "transformer.h.12.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "transformer.h.12.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "transformer.h.13.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "transformer.h.13.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 8421376
},
{
"name": "transformer.h.13.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 8433664
},
{
"name": "transformer.h.13.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16822272
},
{
"name": "transformer.h.13.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16826368
},
{
"name": "transformer.h.13.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16842752
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16846848
},
{
"name": "transformer.h.14.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "transformer.h.14.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 16855040
},
{
"name": "transformer.h.14.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 16867328
},
{
"name": "transformer.h.14.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25255936
},
{
"name": "transformer.h.14.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25260032
},
{
"name": "transformer.h.14.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25276416
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25280512
},
{
"name": "transformer.h.15.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25284608
},
{
"name": "transformer.h.15.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25288704
}
],
"md5sum": "2b0d035e10be953ad8acb4477ac954b8"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.15.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "36124fd7674b8054eac914c55267cea2"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.15.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "eb9a9f7aa1a2da8c6a4a8ff003dd435b"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.16.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7f9c42499cb13bbd3a310ea96e9064b3"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.16.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a0b2478ffd2fbe50d8d67271a1935e63"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.16.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6da03f35fe9e01208df697422aca69fd"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.17.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "22df39c78cbf0398b68f0ded35cc032a"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.17.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "68b2acafe9398e0336e8ddd4ea783809"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.17.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d9240b93257a5354fc2a02f3e98bd6c3"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.18.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d541e5db82d9f19e5734e14afe10331d"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25300992,
"records": [
{
"name": "transformer.h.15.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.15.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "transformer.h.15.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "transformer.h.15.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "transformer.h.16.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "transformer.h.16.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 8421376
},
{
"name": "transformer.h.16.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 8433664
},
{
"name": "transformer.h.16.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16822272
},
{
"name": "transformer.h.16.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16826368
},
{
"name": "transformer.h.16.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16842752
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16846848
},
{
"name": "transformer.h.17.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "transformer.h.17.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 16855040
},
{
"name": "transformer.h.17.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 16867328
},
{
"name": "transformer.h.17.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25255936
},
{
"name": "transformer.h.17.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25260032
},
{
"name": "transformer.h.17.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25276416
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25280512
},
{
"name": "transformer.h.18.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25284608
},
{
"name": "transformer.h.18.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25288704
}
],
"md5sum": "04f67f56ff47ef38c4980a7941ebd71c"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.18.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "edeed64f99b4db0b91c2ca73906eeefa"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.18.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "06ea8110c08fc61ac217f0ca00af3f11"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.19.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "993ce6b239a07be350df994ad0f5f55a"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.19.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "56d8bd166f3938d314fb357ba5b6af29"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.19.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a1953efdb537fed81ad3ee64813c8af6"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.20.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "91627af652fdfd177f3ffc862f9b85fd"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.20.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6e3ced2a50b0cc260d02e86a4aae234d"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.20.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f47401d11940ed82b90157a561fc3725"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.21.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "263ad0599fdc7e6a1207447cf819f232"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 25300992,
"records": [
{
"name": "transformer.h.18.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.18.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "transformer.h.18.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "transformer.h.18.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "transformer.h.19.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "transformer.h.19.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 8421376
},
{
"name": "transformer.h.19.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 8433664
},
{
"name": "transformer.h.19.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16822272
},
{
"name": "transformer.h.19.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16826368
},
{
"name": "transformer.h.19.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16842752
},
{
"name": "transformer.h.20.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16846848
},
{
"name": "transformer.h.20.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "transformer.h.20.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 16855040
},
{
"name": "transformer.h.20.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 16867328
},
{
"name": "transformer.h.20.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25255936
},
{
"name": "transformer.h.20.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25260032
},
{
"name": "transformer.h.20.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25276416
},
{
"name": "transformer.h.21.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25280512
},
{
"name": "transformer.h.21.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25284608
},
{
"name": "transformer.h.21.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25288704
}
],
"md5sum": "97afffba16275ee5e5d147fa1c788d18"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.21.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2a430f95773ba9d74b2bf46981956085"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.21.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8ae40c479c8f397c8e71b1886c095bf5"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.22.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "68a29572f1f4b30a706781a1de068c25"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.22.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1e7e0e836e24c1b866241b95cfa51fc8"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.22.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a3c5ae69e214eb5968920a699625b8a0"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.23.mixer.Wqkv.weight",
"shape": [
6144,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "969f0f260ff937665b48356e86d551b8"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.23.mlp.fc1.weight",
"shape": [
8192,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "58deb32cec3dc98c9d3dbdc08819de78"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "transformer.h.23.mlp.fc2.weight",
"shape": [
2048,
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4facc13015040b6e09345b47bdcd8d80"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 209715200,
"records": [
{
"name": "lm_head.linear.weight",
"shape": [
51200,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 209715200,
"byteOffset": 0
}
],
"md5sum": "f3be3e3e8addb4c825a8151cf42860a3"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25391104,
"records": [
{
"name": "transformer.h.21.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "transformer.h.21.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "transformer.h.21.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 8392704
},
{
"name": "transformer.h.21.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8409088
},
{
"name": "transformer.h.22.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8413184
},
{
"name": "transformer.h.22.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 8417280
},
{
"name": "transformer.h.22.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 8421376
},
{
"name": "transformer.h.22.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 8433664
},
{
"name": "transformer.h.22.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16822272
},
{
"name": "transformer.h.22.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16826368
},
{
"name": "transformer.h.22.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16842752
},
{
"name": "transformer.h.23.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16846848
},
{
"name": "transformer.h.23.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 16850944
},
{
"name": "transformer.h.23.mixer.Wqkv.bias",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 16855040
},
{
"name": "transformer.h.23.mixer.out_proj.weight",
"shape": [
2048,
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8388608,
"byteOffset": 16867328
},
{
"name": "transformer.h.23.mixer.out_proj.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25255936
},
{
"name": "transformer.h.23.mlp.fc1.bias",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 25260032
},
{
"name": "transformer.h.23.mlp.fc2.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25276416
},
{
"name": "lm_head.ln.weight",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25280512
},
{
"name": "lm_head.ln.bias",
"shape": [
2048
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4096,
"byteOffset": 25284608
},
{
"name": "lm_head.linear.bias",
"shape": [
51200
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 102400,
"byteOffset": 25288704
}
],
"md5sum": "119a470d69bbc09cc49391e3cfcf5c9f"
}
]
}