diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e9285b77e31fa599c9bc90e9497f94db17ce2ef6
--- /dev/null
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+ "architectures": [
+ "MixtralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 6144,
+ "initializer_range": 0.02,
+ "intermediate_size": 16384,
+ "max_position_embeddings": 65536,
+ "model_type": "mixtral",
+ "num_attention_heads": 48,
+ "num_experts_per_tok": 2,
+ "num_hidden_layers": 56,
+ "num_key_value_heads": 8,
+ "num_local_experts": 8,
+ "output_router_logits": false,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000,
+ "router_aux_loss_coef": 0.001,
+ "router_jitter_noise": 0.0,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.40.0.dev0",
+ "use_cache": true,
+ "vocab_size": 32000
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..05283793bb9bb2f7a016ed90b7bedee58adbea84
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.40.0.dev0"
+}
diff --git a/model-00001-of-00059.safetensors b/model-00001-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..43381716a6f74e87a2294867bf41434a98feb25d
--- /dev/null
+++ b/model-00001-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:959215668dd6848473ea880a260766bd84d49f3091a1b5f41be1b3b2f7bf35c4
+size 4998663696
diff --git a/model-00002-of-00059.safetensors b/model-00002-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1a6d5619b1e389451c49c697f13d50fa555ea421
--- /dev/null
+++ b/model-00002-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:800f904fea1a126f9a032f0f5b24cc66233eef9b66e5896d0361e8871ca64981
+size 4806799120
diff --git a/model-00003-of-00059.safetensors b/model-00003-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9805f2451a98183a30ac2d1bcaa8ee4c59328419
--- /dev/null
+++ b/model-00003-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69d479a3893bc011da056deb52e1b4f1f70b802a0538968cd63eedb55d067925
+size 4806799120
diff --git a/model-00004-of-00059.safetensors b/model-00004-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..678c3e9243d6087cd3863cb845761741f0b33642
--- /dev/null
+++ b/model-00004-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74ea035e0138a2bfbabf942fbc843f1191037ed27e0062a6c0f02ff1c6102e3b
+size 4806799120
diff --git a/model-00005-of-00059.safetensors b/model-00005-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0d1d4d51ee44199fc0fc1930317c3f98f297ec6a
--- /dev/null
+++ b/model-00005-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56002e6eded8e1a494e07f833a16512a01f99f1da734e2dc13268d134ad4a383
+size 4806799120
diff --git a/model-00006-of-00059.safetensors b/model-00006-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..463eb7b31e483833dc2890d248d1405ce69ef8b9
--- /dev/null
+++ b/model-00006-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91178c8d09cd0dae938ce81fea1507a36cf2b28a2d0f995f765c246fca90825b
+size 4806799120
diff --git a/model-00007-of-00059.safetensors b/model-00007-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d87763e0f2c4a684227deff4c5e7c9b0c3eedfef
--- /dev/null
+++ b/model-00007-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd272e4a82b0de29a563069b470912d860f5fdceed800889948155b8dada2c2f
+size 4806799120
diff --git a/model-00008-of-00059.safetensors b/model-00008-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f707388e995796ba6ad98d42436d2438dc4cefaf
--- /dev/null
+++ b/model-00008-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9248cd047634f2c5c540556d0d4b396a3e3f11988dac72c8e074ade51cbe22a9
+size 4806799120
diff --git a/model-00009-of-00059.safetensors b/model-00009-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..296a79845eb4e8dd84aacacddb040d1d89173f5c
--- /dev/null
+++ b/model-00009-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19d8ac7544629355e147b17469214f18b79c3c43c70fb48563cc200aedd1e8cb
+size 4806799120
diff --git a/model-00010-of-00059.safetensors b/model-00010-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9a837b8b766bbe68184673830dae9c103ad6caa0
--- /dev/null
+++ b/model-00010-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbc3658591e93d488518d8cc46c2ce09ed19e28950dc9a8918438a386a36f08a
+size 4806799120
diff --git a/model-00011-of-00059.safetensors b/model-00011-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f9517e84c1159dce55856f6203777f002059e921
--- /dev/null
+++ b/model-00011-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d12b9677f2ef087ad3c52c3a97c8ddb87673e9614486ac4f2077846436a40dc
+size 4806799136
diff --git a/model-00012-of-00059.safetensors b/model-00012-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a18e3cd116199a2c967cadc8f421bf26fbefcd20
--- /dev/null
+++ b/model-00012-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:504cbcc3c1b6918fe52713d764ec2e9cb70bc12436bd89466b987b0d05ddf1bd
+size 4806799152
diff --git a/model-00013-of-00059.safetensors b/model-00013-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0052ecffa7b36bd42225178b722ee85e1b043236
--- /dev/null
+++ b/model-00013-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c4e20accd34d8f8c51e6c8ae6fbc23cebbc7519c7c6242abe82be69d0b324ea
+size 4806799152
diff --git a/model-00014-of-00059.safetensors b/model-00014-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6e15f99db9e7297b676fe01a0043fa018d328943
--- /dev/null
+++ b/model-00014-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a06577994c4117e5398be33e53b0b8f25949bb5046432fbc30442fea9fded54
+size 4806799152
diff --git a/model-00015-of-00059.safetensors b/model-00015-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d80daca75583964da8b55726236be92f2631eb68
--- /dev/null
+++ b/model-00015-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c590ff45831d065390b8e5ba86cc9aec9bb6906677a79a3cf2fbde3760ab4c74
+size 4806799152
diff --git a/model-00016-of-00059.safetensors b/model-00016-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..02761de79f8df7c6478246b5e7fce010ea5eb03c
--- /dev/null
+++ b/model-00016-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66f3ca30927595ba1a53439a3258660548e7e01b60e2e6455439991344675059
+size 4806799152
diff --git a/model-00017-of-00059.safetensors b/model-00017-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..865de9fc6698ff1cd7f26a10e26ff444a0463f0a
--- /dev/null
+++ b/model-00017-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb0a3a4d51305c48f3fcef1055be70ea752b01468c3f9da8ad71c3cba452c2b5
+size 4806799152
diff --git a/model-00018-of-00059.safetensors b/model-00018-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b1b78638b8290895b387f064745ff0ec9c6bc2f
--- /dev/null
+++ b/model-00018-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5fe7294be953c69007cb8c482c68a3e69e253dda478dbf04df7bd073a83fa43
+size 4806799152
diff --git a/model-00019-of-00059.safetensors b/model-00019-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2cb71ddf51ec5f837983e4db48db186cfc2771b7
--- /dev/null
+++ b/model-00019-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:664059b8a83121d16b19a9231fd81adbe5ef2f48bb5c6eaf8d4a40934269b524
+size 4806799152
diff --git a/model-00020-of-00059.safetensors b/model-00020-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7e0fec8a63e11850f157e1a87baff733ce897d91
--- /dev/null
+++ b/model-00020-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0821243522d3542a127f2cfe0ed17b778f4f17dd50202be53ef40d14fcdba648
+size 4806799152
diff --git a/model-00021-of-00059.safetensors b/model-00021-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7784e1ddb63d7e95d1cb455c8fe2b8be3a86e27d
--- /dev/null
+++ b/model-00021-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68f1e1e1895674a6810adb48931d3a9711e10fac510859851e7bda8ee9a18000
+size 4806799152
diff --git a/model-00022-of-00059.safetensors b/model-00022-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8ff8def20d9f2c6513caaa0ed7f25edb9fe9bdd4
--- /dev/null
+++ b/model-00022-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77d1c0bc279b7f4bf89c5a2929bc2f194dfc90a7ca962ec4bfb4e18085482eaf
+size 4806799152
diff --git a/model-00023-of-00059.safetensors b/model-00023-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..af6844d0f27fe6f7244d2842d634427adc7010bf
--- /dev/null
+++ b/model-00023-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:120b4205c1823e72d11f7772cdc6a37414f447da13f641c91b3d40d17e5eb4eb
+size 4806799152
diff --git a/model-00024-of-00059.safetensors b/model-00024-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..492bcbd23433d24a40b7c0b0c7dfb109dd2e95ec
--- /dev/null
+++ b/model-00024-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16c61d525809d1320387c5592ab98f05cc2ab3d1187d66e546cb9e1188b812aa
+size 4932529864
diff --git a/model-00025-of-00059.safetensors b/model-00025-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..599bf91fa5d604da06a215422e42a99793ca3089
--- /dev/null
+++ b/model-00025-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f761c6570988d5b6bdf7f511fbc9c994ef9d3faa6408fd6f562c1a729429dba
+size 4995542848
diff --git a/model-00026-of-00059.safetensors b/model-00026-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b70a0975b828abe336cd55933aec7fcdc82830c
--- /dev/null
+++ b/model-00026-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:879fe13767d964005b9fa8bfa1b5ba0899ec6529dd44a4e41c8a47dfbdb47967
+size 4995542848
diff --git a/model-00027-of-00059.safetensors b/model-00027-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3cec80d017f4ae9f3775c954f0536068a6240eb9
--- /dev/null
+++ b/model-00027-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee959271ba7d2323ae1081f3e51440cd48fbffd6473c643ff27c54a1ac6c02d4
+size 4932628288
diff --git a/model-00028-of-00059.safetensors b/model-00028-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..98e35fa7d3e5b17430794c3fb1f52d6da16c39e3
--- /dev/null
+++ b/model-00028-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa0860fea0341ee0044a1b23fc4872c16b0d7cbc3b086a3796b26c27160007d
+size 4806774344
diff --git a/model-00029-of-00059.safetensors b/model-00029-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a4bba95bb64fcfd63dc9b0afda762f496709a5fe
--- /dev/null
+++ b/model-00029-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cef3e592eda756117188740eae81d27028dccee47d0777b0ea7911d68da4a971
+size 4806799144
diff --git a/model-00030-of-00059.safetensors b/model-00030-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b783619059e4b7a3897650d014eb0eafb5206692
--- /dev/null
+++ b/model-00030-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ac4814702cf7083f1082dec351cea3137a136f8d6f2844f2364f2db576c2667
+size 4806799144
diff --git a/model-00031-of-00059.safetensors b/model-00031-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..871cb419855eec6135412311594ea569c84c4bc5
--- /dev/null
+++ b/model-00031-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66eacaddda00ed6286e91930ab2a07f4a90997335dd4b8ce618dd5354b082f74
+size 4806799144
diff --git a/model-00032-of-00059.safetensors b/model-00032-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..889e3fa9f82f8482bc98262cb0c5413f6a3549ce
--- /dev/null
+++ b/model-00032-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d55dbbf151e5a8da0c178fbafcd204a758b56eb62428433a6d1de0bcfa306fbe
+size 4806799144
diff --git a/model-00033-of-00059.safetensors b/model-00033-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5fa86bf2b94d472789528cc848e78db7b15bb844
--- /dev/null
+++ b/model-00033-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9861a4561cb309a6c41559b966d57581ce4502c1c69dadf8767419c10d311c16
+size 4806799152
diff --git a/model-00034-of-00059.safetensors b/model-00034-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..37d2b57eae0c84dc5a59f9831482c42fbb7076c8
--- /dev/null
+++ b/model-00034-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0db8de1ab89311fad868185498e7733e873e9cdcc7f7204e8dc2af38d5f8256b
+size 4806799152
diff --git a/model-00035-of-00059.safetensors b/model-00035-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7902e5907b7a7a97e64ac0a2c2715fa8a1a05ffb
--- /dev/null
+++ b/model-00035-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7f097c59c32acfa28f31cfa9fd357563b35c48d3c9a2773948e374beef2b62f
+size 4806799152
diff --git a/model-00036-of-00059.safetensors b/model-00036-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5213bf73930749da183d3e1e01e52100658576f7
--- /dev/null
+++ b/model-00036-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b14337938ed54763155528e16bb66a4436967e68a630683db1a890264fd4e5a
+size 4806799152
diff --git a/model-00037-of-00059.safetensors b/model-00037-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..07a372932bf37f15e5c2f63ec124bc40c3009c4e
--- /dev/null
+++ b/model-00037-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56adc089068f1b680d18ab038b2a92db63bb2d6155688e670ad46de66441b65c
+size 4806799152
diff --git a/model-00038-of-00059.safetensors b/model-00038-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..340d43cd444429d635349b4f223ac859526d63f2
--- /dev/null
+++ b/model-00038-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:545a85e71d61ddf175f50324ff655485a59dd640dafaff5a9b7b77d7bf812113
+size 4806799152
diff --git a/model-00039-of-00059.safetensors b/model-00039-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e03715c2a4564486afe92cca7378f918746969ae
--- /dev/null
+++ b/model-00039-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:484e246d71f782b9f2f71f0a9d2ae18d6780f74a8fc657dd28c65bba614987fa
+size 4806799152
diff --git a/model-00040-of-00059.safetensors b/model-00040-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b594703374d2b12c7f4178f19fbae022d6b0ba2f
--- /dev/null
+++ b/model-00040-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ed8c78a8e1f2cc786ceb6e06b2a362cbea71a738b4e2211a4ce6af5b9afec74
+size 4806799152
diff --git a/model-00041-of-00059.safetensors b/model-00041-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b376bdd9ee898189746b39a2914631102cef5de
--- /dev/null
+++ b/model-00041-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:567e34b5f751e9c6d0d6c83887b85d7700f860c70df88f204cc0a7df9d9017fb
+size 4806799152
diff --git a/model-00042-of-00059.safetensors b/model-00042-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e903a9cc3e318a9779584a58cecaa691b3c52254
--- /dev/null
+++ b/model-00042-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1b3b34903f58d50d83fb28c0906ebdb567525020b79162d8ef70392e8231eef
+size 4806799152
diff --git a/model-00043-of-00059.safetensors b/model-00043-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..95a76c87326bbd8510b0220a50e584a2686a3730
--- /dev/null
+++ b/model-00043-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9331cbc59ddf8277728e5fd955f0a99eb88b6a0d3d876f415abcfd3197d36543
+size 4806799152
diff --git a/model-00044-of-00059.safetensors b/model-00044-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1475ddbe1d5a9fd7a4314413bd9c32a8162cb402
--- /dev/null
+++ b/model-00044-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca409e96ac060028d1ebcf604663f2913f0ea9965bbe3d0e92d3e1772c666186
+size 4806799152
diff --git a/model-00045-of-00059.safetensors b/model-00045-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4a662b07fd7f9cfd1dd10039f1130efcf0fbd9f2
--- /dev/null
+++ b/model-00045-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0674c49ba7d91f376d0c639d0f27db271f8462d669170e33e4029cc256040926
+size 4806799152
diff --git a/model-00046-of-00059.safetensors b/model-00046-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b0e065a4a41d1f04c3df258c33a923de8b14c491
--- /dev/null
+++ b/model-00046-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b291db3505669ebb9ff38aab8fb1b8b2ec39a7841557fdbccfe045b737568ee6
+size 4806799152
diff --git a/model-00047-of-00059.safetensors b/model-00047-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a303814e7c12ec279c86ea09ed5f952564649324
--- /dev/null
+++ b/model-00047-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5bb412d255b48bdb6ff41962426e30519e56313f99c7b784091f0d4ec0c46063
+size 4806799152
diff --git a/model-00048-of-00059.safetensors b/model-00048-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ccd693f0e86e86aaaf9f8057e3bd0ead4b02031e
--- /dev/null
+++ b/model-00048-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:471c7534a519ae1c525b2109b0a86aee581a46ef1f65e079ed9a3fd3869e957e
+size 4806799152
diff --git a/model-00049-of-00059.safetensors b/model-00049-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..60902fd0a84e46d264a8e1d565cb8386cd37ef6a
--- /dev/null
+++ b/model-00049-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a40597d552dffccf8d0f3a433750ddb03fd650767bb57f9ee6b2511b1906444f
+size 4806799152
diff --git a/model-00050-of-00059.safetensors b/model-00050-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fe887d374ab40a9954ea9b1e397f9fa4f69a086a
--- /dev/null
+++ b/model-00050-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f0a9ef120c1ac523fca06262c7358a9211f85014ee0429b6349956793220ec1
+size 4806799152
diff --git a/model-00051-of-00059.safetensors b/model-00051-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3f7f9cb36c55df74978ed236e64cfb0fa805a4b6
--- /dev/null
+++ b/model-00051-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c87cdee51aa56fec7f2e4a6888360c571e36c281a429efcd1ddf9350e76f4e7c
+size 4806799152
diff --git a/model-00052-of-00059.safetensors b/model-00052-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ea9f3af627f5ed34d4a4ae22330f9f52fdaeb6ee
--- /dev/null
+++ b/model-00052-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0289628b2e2f48c154d0e3c12a72849c636eb83962a12816247c42876468e51
+size 4932529864
diff --git a/model-00053-of-00059.safetensors b/model-00053-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4e60a21bb18c97abc4c32872fa71803558ca03f9
--- /dev/null
+++ b/model-00053-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48bd7b668b33feef33e8a1e0a5259b97ee611084076a763b05f01cd79f7ff060
+size 4995542848
diff --git a/model-00054-of-00059.safetensors b/model-00054-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4d3e1af4fcfdbf527f48db1d9ec3283f46464159
--- /dev/null
+++ b/model-00054-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7355bce272164d3cc957b8cbb121480c1bc55e7357d2b9f8322e109c3ee4a529
+size 4995542848
diff --git a/model-00055-of-00059.safetensors b/model-00055-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b7fc4e1e6ea1045b9d36e64eee1e5f34f44e266e
--- /dev/null
+++ b/model-00055-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb59be067ffdb6b9c75e02da743b726e665a9a279250d535410334aaec73a14b
+size 4932628288
diff --git a/model-00056-of-00059.safetensors b/model-00056-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d35cba7b220f3bf59934423e6c3e31cfb1e3bfc7
--- /dev/null
+++ b/model-00056-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dc0c6289ffd0423e14f0cfce67eaa863a09903efe8f7617fb9e54ae305c3c0c
+size 4806774344
diff --git a/model-00057-of-00059.safetensors b/model-00057-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0912be35225546340f25eaf1e7d35b9942b6668f
--- /dev/null
+++ b/model-00057-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdb58cdc3488979683886eae74075d656733b039e868cfef6cca944647f66986
+size 4806799144
diff --git a/model-00058-of-00059.safetensors b/model-00058-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..89da57a77f0eafc0c16d20ff261379f952f9c226
--- /dev/null
+++ b/model-00058-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1cff1aa0a090fab2572f2314bbb01c932218e2ae6a22e3e976f1accb69b886c
+size 4806799144
diff --git a/model-00059-of-00059.safetensors b/model-00059-of-00059.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..981b04e687bfb2364e59a8513e5a40eeb59d5c7b
--- /dev/null
+++ b/model-00059-of-00059.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29eddc396332df7782418d5c4d11351bdc9cc554943db883ee6265626018b633
+size 997233472
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..805c18819ee3c4a51164b634df4f4033c74583ad
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,1746 @@
+{
+ "metadata": {
+ "total_size": 281241268224
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00059-of-00059.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00059.safetensors",
+ "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00002-of-00059.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00059.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00059.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00059.safetensors",
+ "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00003-of-00059.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00059.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00059.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.block_sparse_moe.gate.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00012-of-00059.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00011-of-00059.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00011-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.block_sparse_moe.gate.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00013-of-00059.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00012-of-00059.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00012-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.block_sparse_moe.gate.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00014-of-00059.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00013-of-00059.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00013-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.block_sparse_moe.gate.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00015-of-00059.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00014-of-00059.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00014-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.block_sparse_moe.gate.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00016-of-00059.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00015-of-00059.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00015-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.block_sparse_moe.gate.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00017-of-00059.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00016-of-00059.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00016-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.block_sparse_moe.gate.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00018-of-00059.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00017-of-00059.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00017-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.block_sparse_moe.gate.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00019-of-00059.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00018-of-00059.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00018-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.block_sparse_moe.gate.weight": "model-00019-of-00059.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00020-of-00059.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00019-of-00059.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00019-of-00059.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00019-of-00059.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00019-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.block_sparse_moe.gate.weight": "model-00020-of-00059.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00021-of-00059.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00020-of-00059.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00059.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00020-of-00059.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00020-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00059.safetensors",
+ "model.layers.2.block_sparse_moe.gate.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00004-of-00059.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00059.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00059.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.block_sparse_moe.gate.weight": "model-00021-of-00059.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00022-of-00059.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00021-of-00059.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00059.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00021-of-00059.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00021-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.block_sparse_moe.gate.weight": "model-00022-of-00059.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00023-of-00059.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00022-of-00059.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00022-of-00059.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00022-of-00059.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00022-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.block_sparse_moe.gate.weight": "model-00023-of-00059.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00024-of-00059.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00023-of-00059.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00023-of-00059.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00023-of-00059.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00023-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.block_sparse_moe.gate.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00024-of-00059.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00025-of-00059.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00024-of-00059.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00024-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.block_sparse_moe.gate.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00025-of-00059.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00026-of-00059.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00025-of-00059.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00026-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.block_sparse_moe.gate.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00027-of-00059.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00026-of-00059.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00027-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00059.safetensors",
+ "model.layers.26.block_sparse_moe.gate.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00029-of-00059.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00029-of-00059.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00028-of-00059.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00028-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00059.safetensors",
+ "model.layers.27.block_sparse_moe.gate.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00030-of-00059.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00030-of-00059.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00029-of-00059.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00029-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00059.safetensors",
+ "model.layers.28.block_sparse_moe.gate.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00031-of-00059.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00031-of-00059.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00030-of-00059.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00030-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00059.safetensors",
+ "model.layers.29.block_sparse_moe.gate.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00032-of-00059.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00032-of-00059.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00031-of-00059.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00031-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00059.safetensors",
+ "model.layers.3.block_sparse_moe.gate.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00005-of-00059.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00059.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00059.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00059.safetensors",
+ "model.layers.30.block_sparse_moe.gate.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00033-of-00059.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00033-of-00059.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00032-of-00059.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00032-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00059.safetensors",
+ "model.layers.31.block_sparse_moe.gate.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00034-of-00059.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00034-of-00059.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00033-of-00059.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00033-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.block_sparse_moe.gate.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00035-of-00059.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00034-of-00059.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00034-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.block_sparse_moe.gate.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00036-of-00059.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00035-of-00059.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00035-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.block_sparse_moe.gate.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00037-of-00059.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00036-of-00059.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00036-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.block_sparse_moe.gate.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00038-of-00059.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00037-of-00059.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00037-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.block_sparse_moe.gate.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00039-of-00059.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00038-of-00059.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00038-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.block_sparse_moe.gate.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00040-of-00059.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00039-of-00059.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00039-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.block_sparse_moe.gate.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00041-of-00059.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00040-of-00059.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00040-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.block_sparse_moe.gate.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00042-of-00059.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00041-of-00059.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00041-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00059.safetensors",
+ "model.layers.4.block_sparse_moe.gate.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00006-of-00059.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00006-of-00059.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00059.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.block_sparse_moe.gate.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00043-of-00059.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00042-of-00059.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00042-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.block_sparse_moe.gate.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00044-of-00059.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00043-of-00059.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00043-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.block_sparse_moe.gate.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00045-of-00059.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00044-of-00059.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00044-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.block_sparse_moe.gate.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00046-of-00059.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00045-of-00059.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00045-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.block_sparse_moe.gate.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00047-of-00059.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00046-of-00059.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00046-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.block_sparse_moe.gate.weight": "model-00047-of-00059.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00048-of-00059.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00047-of-00059.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00047-of-00059.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00047-of-00059.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00047-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.block_sparse_moe.gate.weight": "model-00048-of-00059.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00049-of-00059.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00048-of-00059.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00048-of-00059.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00048-of-00059.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00048-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.block_sparse_moe.gate.weight": "model-00049-of-00059.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00050-of-00059.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00049-of-00059.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00049-of-00059.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00049-of-00059.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00049-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.block_sparse_moe.gate.weight": "model-00050-of-00059.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00051-of-00059.safetensors",
+ "model.layers.48.self_attn.k_proj.weight": "model-00050-of-00059.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00050-of-00059.safetensors",
+ "model.layers.48.self_attn.q_proj.weight": "model-00050-of-00059.safetensors",
+ "model.layers.48.self_attn.v_proj.weight": "model-00050-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.block_sparse_moe.gate.weight": "model-00051-of-00059.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00052-of-00059.safetensors",
+ "model.layers.49.self_attn.k_proj.weight": "model-00051-of-00059.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00051-of-00059.safetensors",
+ "model.layers.49.self_attn.q_proj.weight": "model-00051-of-00059.safetensors",
+ "model.layers.49.self_attn.v_proj.weight": "model-00051-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.block_sparse_moe.gate.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00007-of-00059.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00006-of-00059.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.block_sparse_moe.gate.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.self_attn.k_proj.weight": "model-00052-of-00059.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00053-of-00059.safetensors",
+ "model.layers.50.self_attn.q_proj.weight": "model-00052-of-00059.safetensors",
+ "model.layers.50.self_attn.v_proj.weight": "model-00052-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.block_sparse_moe.gate.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.input_layernorm.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.post_attention_layernorm.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.self_attn.k_proj.weight": "model-00053-of-00059.safetensors",
+ "model.layers.51.self_attn.o_proj.weight": "model-00054-of-00059.safetensors",
+ "model.layers.51.self_attn.q_proj.weight": "model-00053-of-00059.safetensors",
+ "model.layers.51.self_attn.v_proj.weight": "model-00054-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.4.w1.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.4.w2.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.4.w3.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.5.w1.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.block_sparse_moe.gate.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.input_layernorm.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.post_attention_layernorm.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.self_attn.k_proj.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.self_attn.o_proj.weight": "model-00055-of-00059.safetensors",
+ "model.layers.52.self_attn.q_proj.weight": "model-00054-of-00059.safetensors",
+ "model.layers.52.self_attn.v_proj.weight": "model-00055-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.6.w2.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.6.w3.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.7.w1.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.7.w2.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00059.safetensors",
+ "model.layers.53.block_sparse_moe.gate.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.input_layernorm.weight": "model-00057-of-00059.safetensors",
+ "model.layers.53.post_attention_layernorm.weight": "model-00057-of-00059.safetensors",
+ "model.layers.53.self_attn.k_proj.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.self_attn.o_proj.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.self_attn.q_proj.weight": "model-00056-of-00059.safetensors",
+ "model.layers.53.self_attn.v_proj.weight": "model-00056-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.4.w3.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.5.w1.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.5.w2.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.5.w3.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.6.w1.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.6.w2.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.6.w3.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.7.w1.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00059.safetensors",
+ "model.layers.54.block_sparse_moe.gate.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.input_layernorm.weight": "model-00058-of-00059.safetensors",
+ "model.layers.54.post_attention_layernorm.weight": "model-00058-of-00059.safetensors",
+ "model.layers.54.self_attn.k_proj.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.self_attn.o_proj.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.self_attn.q_proj.weight": "model-00057-of-00059.safetensors",
+ "model.layers.54.self_attn.v_proj.weight": "model-00057-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.4.w1.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.4.w2.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.7.w1.weight": "model-00059-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.7.w2.weight": "model-00059-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.experts.7.w3.weight": "model-00059-of-00059.safetensors",
+ "model.layers.55.block_sparse_moe.gate.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.input_layernorm.weight": "model-00059-of-00059.safetensors",
+ "model.layers.55.post_attention_layernorm.weight": "model-00059-of-00059.safetensors",
+ "model.layers.55.self_attn.k_proj.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.self_attn.o_proj.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.self_attn.q_proj.weight": "model-00058-of-00059.safetensors",
+ "model.layers.55.self_attn.v_proj.weight": "model-00058-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.block_sparse_moe.gate.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00008-of-00059.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00007-of-00059.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00007-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.block_sparse_moe.gate.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00009-of-00059.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00008-of-00059.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00008-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.block_sparse_moe.gate.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00010-of-00059.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00009-of-00059.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00009-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.block_sparse_moe.gate.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00011-of-00059.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00059.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00010-of-00059.safetensors",
+ "model.norm.weight": "model-00059-of-00059.safetensors"
+ }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..a52c50a199269393cd1548c7e6a77a654bd2001b
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,5 @@
+{
+ "bos_token": "",
+ "eos_token": "",
+ "unk_token": ""
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..81730cd96b4768bd1a77a4bd8269c72ea708870b
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,42 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": null,
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}