diff --git a/mlc-chat-config.json b/mlc-chat-config.json index 594b21d93b48e4d9309f2502d8e419cfeab319fa..cac2bfae1d3147a7330c5fffa71a632efcebedd5 100644 --- a/mlc-chat-config.json +++ b/mlc-chat-config.json @@ -45,7 +45,7 @@ "presence_penalty": 0.0, "frequency_penalty": 0.0, "repetition_penalty": 1.0, - "top_p": 1.0, + "top_p": 0.95, "tokenizer_files": [ "tokenizer.model", "tokenizer.json", diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..52122c70ca54257290ccdd0f5d6657bdabac6623 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,7121 @@ +{ + "metadata": { + "ParamSize": 615, + "ParamBytes": 2183177216.0, + "BitsPerParam": 4.061649986174953 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 335626240, + "records": [ + { + "name": "language_model.model.embed_tokens.q_weight", + "shape": [ + 262208, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 335626240, + "byteOffset": 0 + } + ], + "md5sum": "e885a0b51b1e7a5feccb63c2016b00f2" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 41953280, + "records": [ + { + "name": "language_model.model.embed_tokens.q_scale", + "shape": [ + 262208, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 41953280, + "byteOffset": 0 + } + ], + "md5sum": "4af7c226d701c5d1c3b8e4da1ade95b5" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "3b65e3381ecbd769ce7e0d8c67e8db58" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 26896384, + "records": [ + { + "name": "language_model.model.layers.0.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 5120 + }, + { + "name": "language_model.model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13112320 + }, + { + "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14750720 + }, + { + "name": "language_model.model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.0.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18042880 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18043392 + }, + { + "name": "language_model.model.layers.0.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19354112 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19517952 + }, + { + "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22139392 + }, + { + "name": "language_model.model.layers.0.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22467072 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22467584 + }, + { + "name": "language_model.model.layers.0.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25089024 + }, + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25416704 + }, + { + "name": "language_model.model.layers.0.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26727424 + }, + { + "name": "language_model.model.layers.1.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26891264 + } + ], + "md5sum": "adb63b461b7debcc96a5f9dad2fd7974" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "0f9d69a170ee75490fe602d2bb06f252" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.1.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.1.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.1.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.1.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.1.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.10.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "356de615aa47d4bd20ffa55a6961b7fc" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "016a65c86789eb32d5e33d8ec76f6f61" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.10.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.10.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.10.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.10.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.10.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.11.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "0315d3f194085e2ebe951fdd8c0a5e05" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "f92fd8f67e57d519318dada50ffd4b2c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.11.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.11.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.11.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.11.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.11.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.12.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "5e9bba63daebc2d4f5cdc487503ad56c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "0f6db22dcd5f25ca06157fc430230c26" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.12.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.12.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.12.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.12.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.12.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.13.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "2bbe11795b48dd3e295e30bc1df7819c" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "23cac1a72b8ceb2099c8663a974574f9" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "abc26135d3d94f556a760ec12f9db329" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 31638016, + "records": [ + { + "name": "language_model.model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.13.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.13.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.13.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.13.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.13.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 26886144 + }, + { + "name": "language_model.model.layers.14.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 30162944 + }, + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 30163456 + }, + { + "name": "language_model.model.layers.14.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 31474176 + } + ], + "md5sum": "e4edba29ff29f837b236a23f0f24a5f8" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "27bb1c0f05bd6f460375dd8a73b40769" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 32790016, + "records": [ + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 2621440 + }, + { + "name": "language_model.model.layers.14.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 2949120 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 2949632 + }, + { + "name": "language_model.model.layers.14.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 5571072 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 5898752 + }, + { + "name": "language_model.model.layers.14.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 7209472 + }, + { + "name": "language_model.model.layers.2.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 7373312 + }, + { + "name": "language_model.model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 7378432 + }, + { + "name": "language_model.model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 20485632 + }, + { + "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 22124032 + }, + { + "name": "language_model.model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25400832 + }, + { + "name": "language_model.model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25405952 + }, + { + "name": "language_model.model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 25411072 + }, + { + "name": "language_model.model.layers.2.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 25416192 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25416704 + }, + { + "name": "language_model.model.layers.2.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26727424 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 26891264 + }, + { + "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 29512704 + }, + { + "name": "language_model.model.layers.2.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 29840384 + }, + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 29840896 + }, + { + "name": "language_model.model.layers.2.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 32462336 + } + ], + "md5sum": "a6fbb735a7784ae6df7869d6d36e131a" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "4b48f4704e643cfc733b24775b60d840" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 28370944, + "records": [ + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.2.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 1310720 + }, + { + "name": "language_model.model.layers.3.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 1474560 + }, + { + "name": "language_model.model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 1479680 + }, + { + "name": "language_model.model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 14586880 + }, + { + "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 16225280 + }, + { + "name": "language_model.model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 19502080 + }, + { + "name": "language_model.model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 19507200 + }, + { + "name": "language_model.model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 19512320 + }, + { + "name": "language_model.model.layers.3.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 19517440 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 19517952 + }, + { + "name": "language_model.model.layers.3.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 20828672 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 20992512 + }, + { + "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 23613952 + }, + { + "name": "language_model.model.layers.3.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 23941632 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23942144 + }, + { + "name": "language_model.model.layers.3.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26563584 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 26891264 + }, + { + "name": "language_model.model.layers.3.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 28201984 + }, + { + "name": "language_model.model.layers.4.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 28365824 + } + ], + "md5sum": "6a699b2fb31bb447d78a0f872c8fdad9" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "affbbcc34479ff3297bccbb98e71b27f" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.4.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.4.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.4.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.4.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.4.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.5.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "8d2b338a4a4e1bbd0c672eb49d8c625c" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "fd2263fab76b0147acc848536cf6a4df" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.5.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.5.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.5.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.5.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.5.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.6.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "6f27ad634b79e831c1f5fccd4d0e037e" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "20ddde1b7c457fb94ba4d721c6c8f418" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.6.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.6.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.6.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.6.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.6.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.7.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "82c1342f7b69ce5a8b6fca317cd43a65" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "39d2d0bde9af54ee982876200a415f12" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.7.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.7.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.7.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.7.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.7.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.8.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "c3bba6a8e7bf2da174aa79418f6ccec1" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "0a76ab6fc69b424e20ae06f02ae4812c" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.8.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.8.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.8.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.8.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.8.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.9.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "04d09d840645ae71e036908b548f057c" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "f53c0a770001827220fed24c8a7b0792" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.9.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.9.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.9.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.9.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.9.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.14.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "31a2dc796c8193acd627d615277a3aff" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "367bb8a20b8c68e3220e941d883812de" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 32804352, + "records": [ + { + "name": "language_model.model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "language_model.model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14755840 + }, + { + "name": "language_model.model.layers.15.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14760960 + }, + { + "name": "language_model.model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 14766080 + }, + { + "name": "language_model.model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 27873280 + }, + { + "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 29511680 + }, + { + "name": "language_model.model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 32788480 + }, + { + "name": "language_model.model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 32793600 + }, + { + "name": "language_model.model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 32798720 + }, + { + "name": "language_model.model.layers.15.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 32803840 + } + ], + "md5sum": "543f10a49dc7147085f3fe2f97d71299" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "2b4ba5fc534cc7b359642607999d1dba" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 31315456, + "records": [ + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.15.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 1310720 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 1474560 + }, + { + "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 4096000 + }, + { + "name": "language_model.model.layers.15.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 4424192 + }, + { + "name": "language_model.model.layers.15.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 7045632 + }, + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 7373312 + }, + { + "name": "language_model.model.layers.15.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 8684032 + }, + { + "name": "language_model.model.layers.16.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8847872 + }, + { + "name": "language_model.model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 8852992 + }, + { + "name": "language_model.model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 21960192 + }, + { + "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 23598592 + }, + { + "name": "language_model.model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26875392 + }, + { + "name": "language_model.model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26880512 + }, + { + "name": "language_model.model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26885632 + }, + { + "name": "language_model.model.layers.16.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 26890752 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 26891264 + }, + { + "name": "language_model.model.layers.16.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 28201984 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 28365824 + }, + { + "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 30987264 + }, + { + "name": "language_model.model.layers.16.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 31314944 + } + ], + "md5sum": "0dfaddad7502c504f99616a6afcf9a99" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "2711cea9ea337f87a33dd29692652aa5" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 31320064, + "records": [ + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.16.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 2621440 + }, + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 2949120 + }, + { + "name": "language_model.model.layers.16.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 4259840 + }, + { + "name": "language_model.model.layers.17.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 4423680 + }, + { + "name": "language_model.model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 4428800 + }, + { + "name": "language_model.model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 17536000 + }, + { + "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 19174400 + }, + { + "name": "language_model.model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 22451200 + }, + { + "name": "language_model.model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 22456320 + }, + { + "name": "language_model.model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 22461440 + }, + { + "name": "language_model.model.layers.17.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22466560 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 22467072 + }, + { + "name": "language_model.model.layers.17.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 23777792 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 23941632 + }, + { + "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 26563072 + }, + { + "name": "language_model.model.layers.17.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 26890752 + }, + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 26891264 + }, + { + "name": "language_model.model.layers.17.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 29512704 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 29840384 + }, + { + "name": "language_model.model.layers.17.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 31151104 + }, + { + "name": "language_model.model.layers.18.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 31314944 + } + ], + "md5sum": "e485a0bc867fb1fec39164a4cb6971a5" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "9a1d97ba5c4ef96a2f8e545acdab19fc" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.18.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.18.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.18.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.18.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.18.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.18.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.18.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.18.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.19.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "08061281d74b546b52c9d9f1da37dfd1" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "44d23858be4e9c655eb976b0d39628a4" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.19.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.19.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.19.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.19.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.19.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.19.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.19.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.19.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.20.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "142aa4b91b56dce74bf13936de40d8d5" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "67564b4f9e85d82ec3be00b478b885d2" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.20.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.20.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.20.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.20.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.20.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.20.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.20.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.20.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.21.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "0f2072a8878ee4a7f5f2ec422ca70221" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "cf521b93424a0ceb8447fa08d0c85574" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.21.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.21.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.21.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.21.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.21.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.21.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.21.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.21.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.22.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "b5276ed5f4cef69eb30ae5bad4b4d969" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "b147e42ba4fdfc90eea19c1e97adc109" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.22.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.22.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.22.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.22.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.22.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.22.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.22.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.22.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.23.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "68e7f7621f5a1055f2aca7720e449842" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "5224895dc30d9a4b82a5db13a8445ff0" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.23.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.23.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.23.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.23.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.23.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.23.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.23.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.23.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.24.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "c0b945f19a8eb396aef18fc343d054d2" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "da47787d65c6537267ef3e2dc42b91f9" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.24.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.24.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.24.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.24.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.24.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.24.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.24.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.24.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.25.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "4e9cb271a029fff9b61fae98f520a2aa" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "42e97263fea95a2f828488f6e27bc446" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.25.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.25.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.25.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.25.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.25.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.25.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.25.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.25.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.26.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "90c46f5d922317942b6368cc057b835a" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "d05771c2ed7b4a0339a9bd085c17d5fc" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.26.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.26.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.26.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.26.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.26.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.26.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.26.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.26.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.26.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.27.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "c0d9b884ab4a1810cdf1f858b92dd01a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "f716165dadb6779d98e95dcfd8f8f3cb" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.27.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.27.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.27.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.27.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.27.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.27.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.27.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.27.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.27.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.28.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "5ef6e1999731cdb29488bbee007bdaab" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "c900aed49248c61bc29931911294f400" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.28.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.28.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.28.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.28.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.28.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.28.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.28.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.28.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.28.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.29.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "910e45b5cf898eec81b89c62658cbfde" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "7c6e3f5903cfed3c8e108f305371a592" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.29.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.29.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.29.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.29.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.29.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.29.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.29.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.29.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.29.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.30.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "434fb2ea44d48e0dc43248d311bab784" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "a3c2298abe9df34e6cd8d925e8a85cfa" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.30.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.30.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.30.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.30.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.30.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.30.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.30.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.30.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.30.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.31.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "d658fab3530527ae62d63df34bd5663b" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "927fcf46ef99980b3f2ff8d54b5191b9" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.31.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.31.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.31.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.31.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.31.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.31.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.31.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.31.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.31.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.32.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "575d899f07b14958f43b18a97e907e97" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "c3bf5cbcfffa69a24918a76fcee1babd" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.32.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.32.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.32.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.32.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.32.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.32.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.32.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.32.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.32.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.layers.33.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "d6018704bd62345551a06aeb8c1214c5" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 20480, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 26214400, + "byteOffset": 0 + } + ], + "md5sum": "2b1c0c52326f2d621ce0c0394e24778e" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 26891264, + "records": [ + { + "name": "language_model.model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 2560, + 1280 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "language_model.model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 2560, + 320 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "language_model.model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 20480, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 14745600 + }, + { + "name": "language_model.model.layers.33.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18022400 + }, + { + "name": "language_model.model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18027520 + }, + { + "name": "language_model.model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18032640 + }, + { + "name": "language_model.model.layers.33.self_attn.k_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 18037760 + }, + { + "name": "language_model.model.layers.33.self_attn.k_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 18038272 + }, + { + "name": "language_model.model.layers.33.self_attn.k_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 19348992 + }, + { + "name": "language_model.model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 19512832 + }, + { + "name": "language_model.model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 64 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 22134272 + }, + { + "name": "language_model.model.layers.33.self_attn.q_norm.weight", + "shape": [ + 256 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 512, + "byteOffset": 22461952 + }, + { + "name": "language_model.model.layers.33.self_attn.q_proj.q_weight", + "shape": [ + 2048, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 2621440, + "byteOffset": 22462464 + }, + { + "name": "language_model.model.layers.33.self_attn.q_proj.q_scale", + "shape": [ + 2048, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 327680, + "byteOffset": 25083904 + }, + { + "name": "language_model.model.layers.33.self_attn.v_proj.q_weight", + "shape": [ + 1024, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1310720, + "byteOffset": 25411584 + }, + { + "name": "language_model.model.layers.33.self_attn.v_proj.q_scale", + "shape": [ + 1024, + 80 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 163840, + "byteOffset": 26722304 + }, + { + "name": "language_model.model.norm.weight", + "shape": [ + 2560 + ], + "dtype": "bfloat16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26886144 + } + ], + "md5sum": "498c34c1a1bfbc37b715c8c8e06be694" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d2a446091ecfa74b373843df444409380702145 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc67405b216b1f70085c114507d12c12410a32b2797abe72d05d3ffdd5ef239 +size 335626240 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..f00992a7d28dc60d6ac385046395abbb2d364dbf --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5390d399a7a89cef7fd8c83281ce429d3ec1262ab0bcb311de0520a4ee053cb2 +size 41953280 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..92e590c5952b6eb8e346c4e4d9a431e534f63dc4 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2482a8d7dd33b0a67a3574088b4dd3703c9b9ca57e7d0629ca560fb281e879e1 +size 26214400 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b2e5e779ed3774438fad3da6386c542081455b4 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f2bc2ded45ec199db11055d4a6405540fca7ae88a704df9ba621e4aa280c10d +size 26891264 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..c235d141688a01144d61854e1e717a5b210f5eb7 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5885ce87e24efbb6d692ae92ec88a758857afa87c49032639ecfd4f89f6b841f +size 26214400 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..1723104bc4f420f6e266f2d73fdac7c225abdd84 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3776b0509ae86c6728c0d26fa1d5980a60d816aaa87077ae815342d575cb15e +size 26214400 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..b1ce4c94aceba29b40441421265623a5abd9d55e --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395b10e450ce1b3d571d71a35c47dddb8c93e2379aa5cb6c4e4e493a0473f783 +size 31638016 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6ef748c4d98f0ebfacf45c820f51bdd21129d30 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1d715d361997852a98f26a5b3e36459627910659ceda581ba07efd4b530812f +size 26214400 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..561a67c19d08b6b5f6ac5c8066c16e170dc4ce16 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6b82bc46b1b49a783f4b4d82fa0cdf22a48f75e8b65ef4f8822548d8c89f3b9 +size 32790016 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ad8eb03d0f93c98cfdb5f13c0e409e5834f5bd9 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d9cd50beaeb9b754da29f63fb6e07551b4dc21721dd81ff3faec88f3ebd3b9 +size 26214400 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..526f98b0fa317031bae36923976ec3b19e5b34f0 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58380270ccc15f23581b92a8807ff3cb7ae718be83da71958124ffeffa106c83 +size 28370944 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..69aa50a37f5bb03a78945555d79fc22a8d44fba9 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e898ea6affeddbf0e83adf476046e0aa7bb10fb11049fca2dea979a28f74165b +size 26214400 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..a492380a075ae985d93443f203fd77b406a34b5e --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8665222d49a31f3620740e904794c50bdd13957d7ebe13cf471627baf5230c81 +size 26214400 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ff3b49295d43514f62428c5279fb412d329c628 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:356fb68b530cedc4f971fece9cbb59a0360496c200c2af754ee5594bb6103801 +size 26891264 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb1f1ee8cd95208560692c84a21173203de637b2 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bfba927fadc455b0dc3fcd2a39584337770d5ee8be55bf40aa4fc729e11b49b +size 26214400 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..43e21973013b15f46c55b0be13d6ae279d12dc18 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5d13c419b4937766e476f73023727fc7f4668459b1187d9f978d473a60e7e1 +size 26891264 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..943e5780d7f54a4106a335f152c73baf2ea94563 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc9d3068c7f8606de931a98118a9c9a0c45020e161aef6726896d8dd6cefbb9 +size 26214400 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d702c7c804e82b149e078b6ea9ab9d513e731d4 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1640ff15b5ac3e2a787b15aa3aa7a9e53725f148bb895fc8e8f509741bc6b4 +size 26891264 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..f445d636ba864f03e7bff3dfafc9fa4068803ac2 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e8d2b8382f61bada7790c6b6fde7850fa298134a85ad441c740a41da1169808 +size 26214400 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..aea92a0425e1ee7a03498d9dd35e4f6f27cc45f7 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90baa07c0f2ae76a42aa9245821955d24125211809f22d948d069900900bb2c3 +size 26891264 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..e52cd485433dad014df35d28ad22479c22aa541d --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d4cc60f6a0445f563f36336e6c7c3102371ab889e7c31253ca306475a42202 +size 26214400 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..ede9d8145795d96c9b3c78fe214b511e035f84bb --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643f427c5604b41b386e3853d07a228c1dcee7c68d0348cfe612daf8b670985d +size 26891264 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..af8dd03a4d8b01ceed42755c750f148e7189a29a --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83c2e80f2e30d0e259ac72130ae20060d4a98ff3e1d3ba60101e724c9363dd4c +size 26214400 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..a43b1dd8e9f163a0c326b962aff84695938820c7 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788a726422a434ddd300b0d271141d2ab06dee8f9e99d4a7ae2bf2b5cf07256d +size 26896384 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e0a2be4596211eb34a4a8755c1d5b61d2f31e78 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d1831d97ded934c8a656689c78a5529f746817b6358264217542bcd094de1f2 +size 26891264 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..69f2d23294349f7e71cdcf7cbee02d3ad33afc16 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0daa6bfa4a485dd638ddd38734f630ce81488e400774db4ca48fbae1bd5bda87 +size 26214400 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d4e0744e7c4696e55a24b0b08e881f3781ca140 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c335f5c327978494f3986602db00fa4edfeadc106f906e304f68e343204daab0 +size 32804352 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..155ebb693078084d1520d551e2a3536cd9765dd8 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a5233ce107f2660474c3705f0b9919b3cd4df313782aef165bcb836a2bb3e8 +size 26214400 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fc4e799ce0e311da1c6f2c0a292818347488d41 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7cb9a7574b7f41ed60651452e348950fbb601ed25c47fe8318f8f003d678bc0 +size 31315456 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..b180fbc69bbedc6d78b195cebae59baf9ee15ea7 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c966332758465d7353354b081a65e2bb6cdf7c9a0508879a63783d9cde0eee50 +size 26214400 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..e540496935c820aefdc85bd8395979962a1ea9d1 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a4d0195b0f3ac22bc7862b217c828f06761c2a3ea4e708e7a8ce702d599bed +size 31320064 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fa392fa15b40950aee870740b5e62f000855b4b --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619db6c37461fcb76f300250c8d1625c51d6f3c2d7bd3e64f5387c48d9477464 +size 26214400 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3772fbc9822d8515c1e1abd4e8386e1f66a1953 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca6ad3c4a727df361910fe10a74981b839b45489de789650a9f89b2c21c227ca +size 26891264 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..18428c7bcbf85f5b859aeb8f0957a62799b6bea1 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96203fe88f2f894c3c5e16ba887164174197876b563ad6c9a31790ea2c97b848 +size 26214400 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3dadde179b305d5255337ae8e470bfa2036de34 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ea881d68ea13eff779a04e441d900b5453fa825de3d367c33c0f8a85bfb441b +size 26214400 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..e986e27dd00a4ce520b4d8d0d4effea2ef1aa574 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6aedaeff2d881396b5320f8025019bf182bef64500ba63a1120efc09d3f82d +size 26891264 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..f23ab49aaff1a855839271ffcd3400a10db7f2b2 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b9c1ffcc8117889bd54c20bc1c97d973e04d2b7bee75c71d60e0bcf06c5ee76 +size 26214400 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..013ee88ef3b34b339df2e7c0286fc48c8db052ac --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4cc68ef7a1aa61a1c5bd3a054c261cfbda063f82f42e284887baf102317daa6 +size 26891264 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..8120c71c0f324ced84ffe4752c20d692096c6407 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:234f209982d2317f8f2ae74f56a907470fad1bf46ba7493324926f1f1f212a38 +size 26214400 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..7150d1860ee1a17eed87f6a0a05de4827b265595 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81ab32082f97694a92271aa2375f810c05a85ed6d072405ad606b2e59d3acaf +size 26891264 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..042ac41222a8a2d45a36c89bfc4197b3c74b7614 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b59cd55d931204ab290482b4fc410c5cfe7e7d3fb0c5caf03241bb3fe6f40f19 +size 26214400 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..576d778599e578f8bc8c59a3eb99c0a549454942 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed8097e616ac57e8656f2d0cf0202a93d928b4c5fbec438f4f527ba941c01b5 +size 26891264 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..aaf37105a628b979fc65d39a0cbdff9ed479e96a --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906cab19db18bab0002d2c84eb290759eaa8d3a51cdda7300a558f29a609a26f +size 26214400 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..90db6ff86aefa57db42e5a3bb4a8bc2059661797 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fe53bd0435742da9e9426701a9920a0c10629b59c43fb39759228117eb1518e +size 26891264 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..37f734d2bb578e01928b6bdc08330d9109af1772 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7275f58b4861dcfe066d859c2228a772ccada0e8ab6c069100e00934b3d1f413 +size 26214400 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..8c4ddfa103050a48b6ed7c5fa0e04708efe68054 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5033aed31c33df420d74635fb894308db036ec45852cddb44cfd8bb36b0ded9c +size 26891264 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..da433ec6206e1f25f13f33454d0ee6c17e161660 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6155980feccefd423c992f550bb206cee5e4888bd425887b547640daedefa97e +size 26891264 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0b3980347950cbe7ec436dca265e2c63714f119 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df003ca6e52018e7d55903e64735eec9113355f581075afacae9bd47f04aafa6 +size 26214400 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..edca4354c31ea00d096a5e8eb9ee75e1d80ab9b0 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac8135a4e5bd94447f3078c19e21898c11f3ad2a182419f209c2e83c0cc1665 +size 26891264 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..4522a3bcdb8f5cb8c124f7135359ed668299cd57 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d38d0266172e9b7cd5775a1a0bcffddd57ca188c7dbb4afd3ef1dc59ab243b +size 26214400 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..e39e41d478226c6c15161ec1fdea8bef07bca637 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f67e73fb3e7dc14cf353804619ebf660626f54146c9a527f15011c8d921e3e3 +size 26891264 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..44af9c2a5ed83481e4aa55ae310ed3b02a18ecd4 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d74d9b3438767f43c0cb460f970d1c10e8239df2dc28a88c71d2e119189c2190 +size 26214400 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..8ec4b019828fc58b22c3453f77fce0b9310ad0c2 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e9448edca9995aa1f04a5a1a4298c0d84b264ff21b55d2d15d171a34dc058d +size 26891264 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..56c6af3ad7733b5d6f25693e7b182273f4745246 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a5e40a80b52d3125012dca22a8466ecc84c5df475d6d51aa283fddc2cd2d24 +size 26214400 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..098f919f34f76d436342682e4179fd4cacfca3be --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec64cea48716adf06b50492c14c2139df0ff1cbafbcb0b00e25b53c1e60439b0 +size 26891264 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..802cb7012bd32c55216801ac16f7b758f34d4a05 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5ed110de243c162917d1ddafc6a00d0e0693870426813f0a5989333872b553 +size 26214400 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..a235f469068ae974dcd277a4e16a6a6cb4259531 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3d583ab68cc47ec8bf826283cc29093b233473868a75af324db772616604c3 +size 26214400 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..b9ca841633f2a43637a4d170bc53984526f96c95 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75db6972e8f25904435d8f8dcb2604a631bd12e1523feae65cdd2aeaeabbcd08 +size 26891264 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..a96b6801be4a39c7c7ccef8ac06b911f807acf1e --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91be8152f44d5fa0ca8f4df829ca715eeaa484334e2089e03c6963f1b1a72f08 +size 26214400 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..6792e1e6c47552cc5bc625609070ed1b17661405 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c758f5bd54810044e7b1e0c4cbc404d3c49a1e0ed4c47a32a49536218bd1ec70 +size 26891264 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..0710cf5cce186ed0e4eee2be18bc17220b699749 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4296899ad77e961743e98e28e6a0465af857f6fb7db713f9b54670d10e62b31 +size 26214400 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f34d56626cd86b0f6781870c23b1ef0c31dc553 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0550b169ac8c5d7db73d043bfad020f365a8e87a76d605c2ff030894fae478f5 +size 26891264 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a1050b74861c3298d908152a85e122e4334afec --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb403e58513962c7b3b338452cf4b60408663ab083cafaefca806f8a3662ad9c +size 26214400 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..b90618ecbac5e5bbae85353f0a9fd4942f613896 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b16f7c198b9e0118d1983fcba12a701247c719561c54e9c1106c8487b0a9b0 +size 26891264 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc5860186e59e7298047e8aeb65b34508ebd5d54 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d5efec904241785625cece4c90d0ec53d14a8fabbcece091f81a32f30d7f280 +size 26214400 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..2991782ffe687371faaaba7f7aedeeec4ef8a585 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c88b86e9dbb3dcce890e51a0c2e1b06f7bbc4d40ceeff844ea623824fec602 +size 26891264 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..2adf1f37ad35b9dfe2276914f1245f7203ffe8a2 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:626db7388f8d94582355663dd30ad2a966214a1a2fe7ee31dda2d735e3ac2c7d +size 26891264 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..3ad4a7f37c8227a2ac362910a938e610326ea688 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cbe2dd18542153bd66e29b1993bc7d8726027546d0bc545a78a373dc0b898ba +size 26214400 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..b70bec95eb0b973ffa0db78f7ce9062ecbb68382 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4afe5629e915d10d6cb486afdfcf071aca3afcc811d6ff602ba26368895ff7ed +size 26891264