diff --git a/model-00001-of-00053.safetensors b/model-00001-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ceb56583eb4ab597e09d98b8406ea61eb0102fe4 --- /dev/null +++ b/model-00001-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b89611317d1b362b713dd2504bb51514ef50c2747f7997e06da9fd8d9419551b +size 5606008136 diff --git a/model-00002-of-00053.safetensors b/model-00002-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f1056c0e97181f6aa4b2635e5ecd91020b6e9ba --- /dev/null +++ b/model-00002-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8ee62193e53cfb48dd2ae4b6cb9fed1bf10a74e40d1809073a9235048e1b37 +size 5606008136 diff --git a/model-00003-of-00053.safetensors b/model-00003-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51508a961aadeffb32665c5e60105fdc7ec795b5 --- /dev/null +++ b/model-00003-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed53714937cf7ac5d90c6befa024efb5771d5a90b733d6809827bcd12e92cd1 +size 5606008136 diff --git a/model-00004-of-00053.safetensors b/model-00004-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b73890a9806a3f9d655ae3252a6722557c0120c6 --- /dev/null +++ b/model-00004-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98e6996392b4195a60b50cf350c98d486cbfada662888bf660c4354e0552066 +size 5606008136 diff --git a/model-00005-of-00053.safetensors b/model-00005-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f0a5e71ef6ba4e93aaf867536c92c4bc5d0d652 --- /dev/null +++ b/model-00005-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f53a6a93e5dfb1d1d068d792e856098de8d8524f4dfe0b8f708c2a967d81a0 +size 5606008136 diff --git a/model-00006-of-00053.safetensors b/model-00006-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..535c8888199b64b01b81dcf0b57ea6f3e1488216 --- /dev/null +++ b/model-00006-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c69f7cc1ccb8efd19374be076e70404555a8deb890deddbc1dc71938f89016 +size 5606008136 diff --git a/model-00007-of-00053.safetensors b/model-00007-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70c8f95c73f07173e3c64483666f97cc52dce63c --- /dev/null +++ b/model-00007-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f0afd191896db7df37ffb936abdf7c99dbb9839aa53a50602192ed6237c7fa +size 5606008136 diff --git a/model-00008-of-00053.safetensors b/model-00008-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4849dd0e99ea1eac402ebca7333ba2542ef2824 --- /dev/null +++ b/model-00008-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c00b6ee3845aeca5f4b5a5344ccd5e1debcb3797f75a8f16190816e2fa3f0d0 +size 5606008136 diff --git a/model-00009-of-00053.safetensors b/model-00009-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3180f6fe6e428eab915ab4b7f004f61561f36bbe --- /dev/null +++ b/model-00009-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4812ac133a7dd2042cd588556730a95381ba39f6c34fef7adc09248d881be030 +size 5606008136 diff --git a/model-00010-of-00053.safetensors b/model-00010-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b6094f66ef0d6a5076545c1ef1538b2cd4dca7c --- /dev/null +++ b/model-00010-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:434feb33c27f59a92f32bf988cf38d57e45ca5d622815da4a3f944d2ce21168a +size 5606008136 diff --git a/model-00011-of-00053.safetensors b/model-00011-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..164f52cbd242c52c4db95d10be63513499682b24 --- /dev/null +++ b/model-00011-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fa84cb5e13876e3ef7f8c197cdb424176644b087d819932cd8a3b7c13a7169f +size 5606008192 diff --git a/model-00012-of-00053.safetensors b/model-00012-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84516dc33bc87a321e747d3049ee0e808a4840bc --- /dev/null +++ b/model-00012-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e558079385e6a7a6b36d4a4be2ed5fba5e8b108ffb58e7906f3170372caa45f8 +size 5606008192 diff --git a/model-00013-of-00053.safetensors b/model-00013-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83136ffd3cb1799262aa14472c206eaabc388774 --- /dev/null +++ b/model-00013-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a831f95fd1d017347ff3c76170353d28078ed31828ce864a4254806ad03df45 +size 5606008192 diff --git a/model-00014-of-00053.safetensors b/model-00014-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4af9f57f6e9db0ec05ef6e20293ac53b319682b5 --- /dev/null +++ b/model-00014-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3567e444c5db42a2dc652f595c64ba951235d8d8c5d114017ebeef917cd11f22 +size 5606008192 diff --git a/model-00015-of-00053.safetensors b/model-00015-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f7f85589c3b9eb4cad831260b48da0168185d538 --- /dev/null +++ b/model-00015-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e933acd7a1af342007be1aba3b630301d31108e01da1e229341e818902d6a632 +size 5606008192 diff --git a/model-00016-of-00053.safetensors b/model-00016-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e8d35cc46c9f6dbdca534a3b0ad3450dd5ab0bf --- /dev/null +++ b/model-00016-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf6937d58157112e15a75ce2cdc37aa041eb038b132eaab24ae36ce9f5bb794 +size 5606008192 diff --git a/model-00017-of-00053.safetensors b/model-00017-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0fa4cad2aae96913751121046bff4dee471126d3 --- /dev/null +++ b/model-00017-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:792acfafa7dea0eea68440a89dfc73cb6c3b8c30345a3fa1a354f2513ee796fe +size 5606008192 diff --git a/model-00018-of-00053.safetensors b/model-00018-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee4edd42095898b195e390c60fb75a508a53206b --- /dev/null +++ b/model-00018-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21601fc36884984f25ade1944b524fd640b645595ea306d26485fbf8116a2a5 +size 5606008192 diff --git a/model-00019-of-00053.safetensors b/model-00019-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51ca1fdf512b7f90a0c0e75dc1b8cb37d7a4e830 --- /dev/null +++ b/model-00019-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc134f22d099f2fb473016c2cd62fb262c9afede4d2f3ccbacd61aadabb1953 +size 5606008192 diff --git a/model-00020-of-00053.safetensors b/model-00020-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80dd0ad43ab85b7d9e43c385cb11b05ba5482706 --- /dev/null +++ b/model-00020-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f321f44f37f3a93943371d3473779b2e43c3ff56b2e7058a19efca16e3ef4cb +size 5606008192 diff --git a/model-00021-of-00053.safetensors b/model-00021-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8fe1bd9b097415f7c5f0eb85bac6d8a7ab8b468 --- /dev/null +++ b/model-00021-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54241f5dd77457605766ec45022adceb99573798081cb2088e328d050230521 +size 5606008192 diff --git a/model-00022-of-00053.safetensors b/model-00022-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aefa45c95bde8e4b02c7b0ad1ddfcbcdc988e85a --- /dev/null +++ b/model-00022-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d6c0776a48bb8fd59020ef2d2ee63ccea8f1ff336ff86b80d28160d388bf42c +size 5606008192 diff --git a/model-00023-of-00053.safetensors b/model-00023-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db07556f08dccaea5ab6c96d80f7821d82dabfd6 --- /dev/null +++ b/model-00023-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ead583344b1998227c42c7ce7b24700e935ebfe3e99b18e8baa0c340c30f28 +size 5606008192 diff --git a/model-00024-of-00053.safetensors b/model-00024-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29e59d8ded235f74f34d5c9e2bffc38d2aacef8b --- /dev/null +++ b/model-00024-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ac55a642649d2bb2c5dc9e72cc7ee2c91ffc260d3e828ed524ca636f962fc0 +size 5606008192 diff --git a/model-00025-of-00053.safetensors b/model-00025-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab9f59592a00ca2cecb0ab53cfaa957187a64378 --- /dev/null +++ b/model-00025-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d694188eed85a3d2530d88ba24c1c2238300e012c280048f9695b4439fb218 +size 5606008192 diff --git a/model-00026-of-00053.safetensors b/model-00026-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b52171a2d802d623ccacef76209fa73ca56f94c --- /dev/null +++ b/model-00026-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4769ee31b37775b98bf40c8459eb31f012194d580b0dc5bd59f2ed0d3c9660b7 +size 5606008192 diff --git a/model-00027-of-00053.safetensors b/model-00027-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d9e5a63bd1de17847a4bafa59c14d94b74af6bd --- /dev/null +++ b/model-00027-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4188f8c1cbf234eae80c07d90356fec9dc107fa9a6ab22bb9e0168495ea51c6 +size 5606008192 diff --git a/model-00028-of-00053.safetensors b/model-00028-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b303b1aca92bdf910f871aef2d421a880d0acffe --- /dev/null +++ b/model-00028-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca68386c9a1c206a386f8f117d288e53870e71828c92c6eabfcb7c10ef673859 +size 5606008192 diff --git a/model-00029-of-00053.safetensors b/model-00029-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..143b81f909fc49493635c86d6a2d9ce2c77776fd --- /dev/null +++ b/model-00029-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c44cdc558ce18587b6953be6eb3bc42c9d6622881ddadbd335edff00062fa63 +size 5606008192 diff --git a/model-00030-of-00053.safetensors b/model-00030-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1cc50ededbb96b903a8ece9bc6619fc168339948 --- /dev/null +++ b/model-00030-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0b8289ba42f0c045f648c22cde224e330bc2da5d43368f44c24903b0a005ab +size 5606008192 diff --git a/model-00031-of-00053.safetensors b/model-00031-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cff5f1b2d0d09812f145a0a564d001c5f33a56b --- /dev/null +++ b/model-00031-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22b85d7339f29ed3ece21dedf2401fd6cff650fb478f9da1137782bfb4a72af +size 5606008192 diff --git a/model-00032-of-00053.safetensors b/model-00032-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..282fba0e6038156e296c494ba4aaa004d977da8d --- /dev/null +++ b/model-00032-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6117fb0dd8c9d98874025d6078a35eac0c8d10d4bb237253e11ec5472b31a1cd +size 5606008192 diff --git a/model-00033-of-00053.safetensors b/model-00033-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1ab6ad56a1fd04c977ad741f2f8a870a0abad72 --- /dev/null +++ b/model-00033-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68f00c13104bf968c3fc04fd46149e0145e0539d77f6ce44455dfcd7dd59798 +size 5606008192 diff --git a/model-00034-of-00053.safetensors b/model-00034-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af96322fd70e316d962ce9a8ecb759d4705665ba --- /dev/null +++ b/model-00034-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f78c82bf3fb1459b9b397db46361889e42732eda83e8a16fbc6357f7cdc76fe +size 5606008192 diff --git a/model-00035-of-00053.safetensors b/model-00035-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdb1a943e06f4e9636f04d701bccdc05cc69c8df --- /dev/null +++ b/model-00035-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd612f62b4c6f12305e91c617e8b45b4ca8e5a477a1b1d74a382ad0d35894b7f +size 5606008192 diff --git a/model-00036-of-00053.safetensors b/model-00036-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26439af593c170e709955db6294cc0f636062836 --- /dev/null +++ b/model-00036-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996111069cbbe5bd42a98dc22a5746e026f989018c22146f375e0b0a8d900d08 +size 5606008192 diff --git a/model-00037-of-00053.safetensors b/model-00037-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1829536d5e6590f550da47baf9c0c8c87e588c7c --- /dev/null +++ b/model-00037-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5526011c319b85e129501c8a291d42ff6dfb295dcedfe2a07a0bf22740378415 +size 5606008192 diff --git a/model-00038-of-00053.safetensors b/model-00038-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0401db67b703fbaf3fded6021bd2d22097a1e0d3 --- /dev/null +++ b/model-00038-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89d5eecd3c9baf98cd5829a22b119f87c1d12a2b3c9a53d6dd64f9ecf98179cd +size 5606008192 diff --git a/model-00039-of-00053.safetensors b/model-00039-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01e2468fb19d3c296bd55388a7fe67fb12d67f8d --- /dev/null +++ b/model-00039-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d284981ae3bf2ec528e7c505c6a0298fa3f6c5c4395ad37f0ef20c77d3f3dd2d +size 5606008192 diff --git a/model-00040-of-00053.safetensors b/model-00040-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d49820364e2723c865cf07445e6c42d911a95fe8 --- /dev/null +++ b/model-00040-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab537569c258598bb98b7da5161f02a6ba064e92653d8c4c6583848b54bb0d7 +size 5606008192 diff --git a/model-00041-of-00053.safetensors b/model-00041-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4f9e08e4994758e2004e5f6b7c22ea009c10fe6 --- /dev/null +++ b/model-00041-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81f8c4cd5e324ca2d1944687ec4f962dc1c53f33f6e204a761d2bcf04bfd9f7d +size 5606008192 diff --git a/model-00042-of-00053.safetensors b/model-00042-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f5aeeadae3ee20b7c90d9646cd8f378429aa8a3 --- /dev/null +++ b/model-00042-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7c9e588eb178c34c4e1e65d595e7254b4971ee2fb952cdc5d76a3776390eb0 +size 5606008192 diff --git a/model-00043-of-00053.safetensors b/model-00043-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c3945e9a053f6711e2b2470e972628b2f46589d --- /dev/null +++ b/model-00043-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a4fd3bcd26a08317880ebaf810337721b65aa20252c37e6a9b963aff0891574 +size 5606008192 diff --git a/model-00044-of-00053.safetensors b/model-00044-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3d4925ac0c60675fecd2b5eed07cddb6b4ef09d --- /dev/null +++ b/model-00044-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8e38b28ff14e96a16f18ac4082a56fc5006f31f351c278f0584b45c4e0c879 +size 5606008192 diff --git a/model-00045-of-00053.safetensors b/model-00045-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1fe4197dbbf384cfcddd10d3b2288ab7de59685 --- /dev/null +++ b/model-00045-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc85d321b3dfbbb133ec7d06f3b31f429bb66651da87169468bce0df89ca6aed +size 5606008192 diff --git a/model-00046-of-00053.safetensors b/model-00046-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b459fe019e9568314530cb239e02aaa725693d60 --- /dev/null +++ b/model-00046-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d1c681fb4f63e678af9f70c2c81e678292c8e4880c4471f21787c68ca9df7a0 +size 5606008192 diff --git a/model-00047-of-00053.safetensors b/model-00047-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfc8d5e08e2398e4cf409e17179a3ee7785499e0 --- /dev/null +++ b/model-00047-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc3c1e9770c5f87f1739ae1a1a6215b5f33e98761cfacc7540ba2e2c03edff9 +size 5606008192 diff --git a/model-00048-of-00053.safetensors b/model-00048-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..faf624ee7e2636cbac336b9a52702adb82fa9034 --- /dev/null +++ b/model-00048-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba9a1a0b5189f03338bdc7fc92974072e91feb06c3e709c9fc54e7a64f9a2b58 +size 5606008192 diff --git a/model-00049-of-00053.safetensors b/model-00049-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04fd1b9b89a86cc22ded36d30a1d62d4e53708c6 --- /dev/null +++ b/model-00049-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38d05405ef56debb6692eacf5f3b459660737e755038186561da534ae1f2d20 +size 5606008192 diff --git a/model-00050-of-00053.safetensors b/model-00050-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44682a71c098cadaa337b21026ab681677363524 --- /dev/null +++ b/model-00050-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4628febdaae2c7bb39b2f4c18e49dd0ea61db1045aad19a99d501f7c7c3613b3 +size 5606008192 diff --git a/model-00051-of-00053.safetensors b/model-00051-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..599aeb85d195737fc470a3eb00a2d24808f48d20 --- /dev/null +++ b/model-00051-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4867c834021cce22a9bf111094f176602f9fedd9a535c41b7d4857d128ec76 +size 5606008192 diff --git a/model-00052-of-00053.safetensors b/model-00052-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..beeafcec5efef85037ad31ee8330bd11e4c3e9cb --- /dev/null +++ b/model-00052-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c3e770df23b2f013d439dfb31f40f8fe52a5d22f3f0ac75a75dfaa1db8cfec +size 5606008192 diff --git a/model-00053-of-00053.safetensors b/model-00053-of-00053.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e9a2a1ccb195780f2556474b59c7eed89c86332 --- /dev/null +++ b/model-00053-of-00053.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a504e0e0739e369078fec50f261bfe03993bdeb4b15ec0fcec20c49fcba6e537 +size 1207895360 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..61e6a01f90b5a0713647507ee4508e7e9df962eb --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,2922 @@ +{ + "metadata": { + "total_size": 292719941632 + }, + "weight_map": { + "model.layers.0.input_layernorm.weight": "model-00001-of-00053.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00053.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00053.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00053.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00053.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00053.safetensors", + "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00053.safetensors", + "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00053.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00053.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00053.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00053.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00053.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00053.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00053.safetensors", + "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00002-of-00053.safetensors", + "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00002-of-00053.safetensors", + "model.layers.2.input_layernorm.weight": "model-00003-of-00053.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00053.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00053.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00053.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00053.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00053.safetensors", + "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00003-of-00053.safetensors", + "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00003-of-00053.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00053.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00053.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00053.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00053.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00053.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00053.safetensors", + "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00004-of-00053.safetensors", + "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00004-of-00053.safetensors", + "model.layers.4.input_layernorm.weight": "model-00005-of-00053.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00053.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00053.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00053.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00053.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00053.safetensors", + "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00005-of-00053.safetensors", + "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00005-of-00053.safetensors", + "model.layers.5.input_layernorm.weight": "model-00006-of-00053.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00053.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00006-of-00053.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00006-of-00053.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00053.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00053.safetensors", + "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00006-of-00053.safetensors", + "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00006-of-00053.safetensors", + "model.layers.6.input_layernorm.weight": "model-00007-of-00053.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00053.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00007-of-00053.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00007-of-00053.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00007-of-00053.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00007-of-00053.safetensors", + "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.8.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.9.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.10.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.11.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.12.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.13.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.14.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w1.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w3.weight": "model-00007-of-00053.safetensors", + "model.layers.6.block_sparse_moe.experts.15.w2.weight": "model-00007-of-00053.safetensors", + "model.layers.7.input_layernorm.weight": "model-00008-of-00053.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00008-of-00053.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00008-of-00053.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00008-of-00053.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00008-of-00053.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00008-of-00053.safetensors", + "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.8.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.9.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.10.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.11.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.12.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.13.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.14.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w1.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w3.weight": "model-00008-of-00053.safetensors", + "model.layers.7.block_sparse_moe.experts.15.w2.weight": "model-00008-of-00053.safetensors", + "model.layers.8.input_layernorm.weight": "model-00009-of-00053.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00009-of-00053.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00009-of-00053.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00009-of-00053.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00009-of-00053.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00053.safetensors", + "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.8.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.9.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.10.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.11.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.12.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.13.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.14.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w1.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w3.weight": "model-00009-of-00053.safetensors", + "model.layers.8.block_sparse_moe.experts.15.w2.weight": "model-00009-of-00053.safetensors", + "model.layers.9.input_layernorm.weight": "model-00010-of-00053.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00010-of-00053.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00053.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00053.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00010-of-00053.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00053.safetensors", + "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.8.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.9.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.10.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.11.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.12.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.13.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.14.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w1.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w3.weight": "model-00010-of-00053.safetensors", + "model.layers.9.block_sparse_moe.experts.15.w2.weight": "model-00010-of-00053.safetensors", + "model.layers.10.input_layernorm.weight": "model-00011-of-00053.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00011-of-00053.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00011-of-00053.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00011-of-00053.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00011-of-00053.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00053.safetensors", + "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.8.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.9.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.10.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.11.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.12.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.13.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.14.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w1.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w3.weight": "model-00011-of-00053.safetensors", + "model.layers.10.block_sparse_moe.experts.15.w2.weight": "model-00011-of-00053.safetensors", + "model.layers.11.input_layernorm.weight": "model-00012-of-00053.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00012-of-00053.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00012-of-00053.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00012-of-00053.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00012-of-00053.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00012-of-00053.safetensors", + "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.8.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.9.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.10.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.11.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.12.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.13.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.14.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w1.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w3.weight": "model-00012-of-00053.safetensors", + "model.layers.11.block_sparse_moe.experts.15.w2.weight": "model-00012-of-00053.safetensors", + "model.layers.12.input_layernorm.weight": "model-00013-of-00053.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00013-of-00053.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00013-of-00053.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00013-of-00053.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00013-of-00053.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00013-of-00053.safetensors", + "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.8.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.9.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.10.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.11.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.12.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.13.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.14.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w1.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w3.weight": "model-00013-of-00053.safetensors", + "model.layers.12.block_sparse_moe.experts.15.w2.weight": "model-00013-of-00053.safetensors", + "model.layers.13.input_layernorm.weight": "model-00014-of-00053.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00014-of-00053.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00014-of-00053.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00014-of-00053.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00014-of-00053.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00014-of-00053.safetensors", + "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.8.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.9.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.10.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.11.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.12.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.13.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.14.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w1.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w3.weight": "model-00014-of-00053.safetensors", + "model.layers.13.block_sparse_moe.experts.15.w2.weight": "model-00014-of-00053.safetensors", + "model.layers.14.input_layernorm.weight": "model-00015-of-00053.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00015-of-00053.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00015-of-00053.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00015-of-00053.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00015-of-00053.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00015-of-00053.safetensors", + "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.8.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.9.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.10.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.11.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.12.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.13.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.14.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w1.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w3.weight": "model-00015-of-00053.safetensors", + "model.layers.14.block_sparse_moe.experts.15.w2.weight": "model-00015-of-00053.safetensors", + "model.layers.15.input_layernorm.weight": "model-00016-of-00053.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00016-of-00053.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00016-of-00053.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00016-of-00053.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00016-of-00053.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00016-of-00053.safetensors", + "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.8.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.9.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.10.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.11.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.12.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.13.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.14.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w1.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w3.weight": "model-00016-of-00053.safetensors", + "model.layers.15.block_sparse_moe.experts.15.w2.weight": "model-00016-of-00053.safetensors", + "model.layers.16.input_layernorm.weight": "model-00017-of-00053.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00017-of-00053.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00017-of-00053.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00017-of-00053.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00017-of-00053.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00017-of-00053.safetensors", + "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.8.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.8.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.8.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.9.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.9.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.9.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.10.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.10.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.10.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.11.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.11.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.11.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.12.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.12.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.12.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.13.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.13.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.13.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.14.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.14.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.14.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.15.w1.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.15.w3.weight": "model-00017-of-00053.safetensors", + "model.layers.16.block_sparse_moe.experts.15.w2.weight": "model-00017-of-00053.safetensors", + "model.layers.17.input_layernorm.weight": "model-00018-of-00053.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00018-of-00053.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00018-of-00053.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00018-of-00053.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00018-of-00053.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00018-of-00053.safetensors", + "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.8.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.8.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.8.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.9.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.9.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.9.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.10.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.10.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.10.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.11.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.11.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.11.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.12.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.12.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.12.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.13.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.13.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.13.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.14.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.14.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.14.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.15.w1.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.15.w3.weight": "model-00018-of-00053.safetensors", + "model.layers.17.block_sparse_moe.experts.15.w2.weight": "model-00018-of-00053.safetensors", + "model.layers.18.input_layernorm.weight": "model-00019-of-00053.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00019-of-00053.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00019-of-00053.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00019-of-00053.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00019-of-00053.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00019-of-00053.safetensors", + "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.8.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.8.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.8.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.9.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.9.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.9.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.10.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.10.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.10.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.11.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.11.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.11.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.12.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.12.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.12.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.13.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.13.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.13.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.14.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.14.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.14.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.15.w1.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.15.w3.weight": "model-00019-of-00053.safetensors", + "model.layers.18.block_sparse_moe.experts.15.w2.weight": "model-00019-of-00053.safetensors", + "model.layers.19.input_layernorm.weight": "model-00020-of-00053.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00020-of-00053.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00020-of-00053.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00020-of-00053.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00020-of-00053.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00053.safetensors", + "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.8.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.8.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.8.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.9.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.9.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.9.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.10.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.10.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.10.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.11.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.11.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.11.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.12.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.12.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.12.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.13.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.13.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.13.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.14.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.14.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.14.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.15.w1.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.15.w3.weight": "model-00020-of-00053.safetensors", + "model.layers.19.block_sparse_moe.experts.15.w2.weight": "model-00020-of-00053.safetensors", + "model.layers.20.input_layernorm.weight": "model-00021-of-00053.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00021-of-00053.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00021-of-00053.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00021-of-00053.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00021-of-00053.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00053.safetensors", + "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.8.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.8.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.8.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.9.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.9.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.9.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.10.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.10.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.10.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.11.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.11.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.11.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.12.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.12.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.12.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.13.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.13.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.13.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.14.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.14.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.14.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.15.w1.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.15.w3.weight": "model-00021-of-00053.safetensors", + "model.layers.20.block_sparse_moe.experts.15.w2.weight": "model-00021-of-00053.safetensors", + "model.layers.21.input_layernorm.weight": "model-00022-of-00053.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00022-of-00053.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00022-of-00053.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00022-of-00053.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00022-of-00053.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00022-of-00053.safetensors", + "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.8.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.8.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.8.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.9.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.9.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.9.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.10.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.10.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.10.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.11.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.11.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.11.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.12.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.12.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.12.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.13.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.13.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.13.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.14.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.14.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.14.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.15.w1.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.15.w3.weight": "model-00022-of-00053.safetensors", + "model.layers.21.block_sparse_moe.experts.15.w2.weight": "model-00022-of-00053.safetensors", + "model.layers.22.input_layernorm.weight": "model-00023-of-00053.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00023-of-00053.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00023-of-00053.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00023-of-00053.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00023-of-00053.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00023-of-00053.safetensors", + "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.8.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.8.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.8.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.9.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.9.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.9.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.10.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.10.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.10.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.11.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.11.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.11.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.12.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.12.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.12.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.13.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.13.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.13.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.14.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.14.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.14.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.15.w1.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.15.w3.weight": "model-00023-of-00053.safetensors", + "model.layers.22.block_sparse_moe.experts.15.w2.weight": "model-00023-of-00053.safetensors", + "model.layers.23.input_layernorm.weight": "model-00024-of-00053.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00024-of-00053.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00024-of-00053.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00024-of-00053.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00024-of-00053.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00024-of-00053.safetensors", + "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.8.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.8.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.8.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.9.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.9.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.9.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.10.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.10.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.10.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.11.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.11.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.11.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.12.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.12.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.12.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.13.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.13.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.13.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.14.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.14.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.14.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.15.w1.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.15.w3.weight": "model-00024-of-00053.safetensors", + "model.layers.23.block_sparse_moe.experts.15.w2.weight": "model-00024-of-00053.safetensors", + "model.layers.24.input_layernorm.weight": "model-00025-of-00053.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00025-of-00053.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00025-of-00053.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00025-of-00053.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00025-of-00053.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00025-of-00053.safetensors", + "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.8.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.8.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.8.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.9.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.9.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.9.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.10.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.10.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.10.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.11.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.11.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.11.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.12.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.12.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.12.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.13.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.13.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.13.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.14.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.14.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.14.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.15.w1.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.15.w3.weight": "model-00025-of-00053.safetensors", + "model.layers.24.block_sparse_moe.experts.15.w2.weight": "model-00025-of-00053.safetensors", + "model.layers.25.input_layernorm.weight": "model-00026-of-00053.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00026-of-00053.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00026-of-00053.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00026-of-00053.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00026-of-00053.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00026-of-00053.safetensors", + "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.8.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.8.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.8.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.9.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.9.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.9.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.10.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.10.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.10.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.11.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.11.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.11.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.12.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.12.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.12.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.13.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.13.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.13.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.14.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.14.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.14.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.15.w1.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.15.w3.weight": "model-00026-of-00053.safetensors", + "model.layers.25.block_sparse_moe.experts.15.w2.weight": "model-00026-of-00053.safetensors", + "model.layers.26.input_layernorm.weight": "model-00027-of-00053.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00027-of-00053.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00027-of-00053.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00027-of-00053.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00027-of-00053.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00027-of-00053.safetensors", + "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.8.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.8.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.8.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.9.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.9.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.9.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.10.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.10.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.10.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.11.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.11.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.11.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.12.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.12.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.12.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.13.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.13.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.13.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.14.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.14.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.14.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.15.w1.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.15.w3.weight": "model-00027-of-00053.safetensors", + "model.layers.26.block_sparse_moe.experts.15.w2.weight": "model-00027-of-00053.safetensors", + "model.layers.27.input_layernorm.weight": "model-00028-of-00053.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00028-of-00053.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00028-of-00053.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00028-of-00053.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00028-of-00053.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00028-of-00053.safetensors", + "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.8.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.8.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.8.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.9.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.9.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.9.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.10.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.10.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.10.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.11.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.11.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.11.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.12.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.12.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.12.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.13.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.13.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.13.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.14.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.14.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.14.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.15.w1.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.15.w3.weight": "model-00028-of-00053.safetensors", + "model.layers.27.block_sparse_moe.experts.15.w2.weight": "model-00028-of-00053.safetensors", + "model.layers.28.input_layernorm.weight": "model-00029-of-00053.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00029-of-00053.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00029-of-00053.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00029-of-00053.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00029-of-00053.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00029-of-00053.safetensors", + "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.8.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.8.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.8.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.9.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.9.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.9.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.10.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.10.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.10.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.11.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.11.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.11.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.12.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.12.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.12.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.13.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.13.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.13.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.14.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.14.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.14.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.15.w1.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.15.w3.weight": "model-00029-of-00053.safetensors", + "model.layers.28.block_sparse_moe.experts.15.w2.weight": "model-00029-of-00053.safetensors", + "model.layers.29.input_layernorm.weight": "model-00030-of-00053.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00030-of-00053.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00030-of-00053.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00030-of-00053.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00030-of-00053.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00030-of-00053.safetensors", + "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.8.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.8.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.8.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.9.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.9.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.9.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.10.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.10.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.10.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.11.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.11.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.11.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.12.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.12.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.12.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.13.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.13.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.13.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.14.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.14.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.14.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.15.w1.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.15.w3.weight": "model-00030-of-00053.safetensors", + "model.layers.29.block_sparse_moe.experts.15.w2.weight": "model-00030-of-00053.safetensors", + "model.layers.30.input_layernorm.weight": "model-00031-of-00053.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00031-of-00053.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00031-of-00053.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00031-of-00053.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00031-of-00053.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00031-of-00053.safetensors", + "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.8.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.8.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.8.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.9.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.9.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.9.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.10.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.10.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.10.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.11.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.11.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.11.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.12.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.12.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.12.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.13.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.13.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.13.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.14.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.14.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.14.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.15.w1.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.15.w3.weight": "model-00031-of-00053.safetensors", + "model.layers.30.block_sparse_moe.experts.15.w2.weight": "model-00031-of-00053.safetensors", + "model.layers.31.input_layernorm.weight": "model-00032-of-00053.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00032-of-00053.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00032-of-00053.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00032-of-00053.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00032-of-00053.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00032-of-00053.safetensors", + "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.8.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.8.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.8.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.9.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.9.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.9.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.10.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.10.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.10.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.11.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.11.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.11.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.12.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.12.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.12.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.13.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.13.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.13.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.14.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.14.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.14.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.15.w1.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.15.w3.weight": "model-00032-of-00053.safetensors", + "model.layers.31.block_sparse_moe.experts.15.w2.weight": "model-00032-of-00053.safetensors", + "model.layers.32.input_layernorm.weight": "model-00033-of-00053.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00033-of-00053.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00033-of-00053.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00033-of-00053.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00033-of-00053.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00033-of-00053.safetensors", + "model.layers.32.self_attn.rotary_emb.inv_freq": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.8.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.8.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.8.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.9.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.9.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.9.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.10.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.10.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.10.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.11.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.11.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.11.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.12.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.12.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.12.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.13.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.13.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.13.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.14.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.14.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.14.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.15.w1.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.15.w3.weight": "model-00033-of-00053.safetensors", + "model.layers.32.block_sparse_moe.experts.15.w2.weight": "model-00033-of-00053.safetensors", + "model.layers.33.input_layernorm.weight": "model-00034-of-00053.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00034-of-00053.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00034-of-00053.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00034-of-00053.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00034-of-00053.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00034-of-00053.safetensors", + "model.layers.33.self_attn.rotary_emb.inv_freq": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.8.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.8.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.8.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.9.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.9.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.9.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.10.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.10.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.10.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.11.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.11.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.11.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.12.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.12.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.12.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.13.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.13.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.13.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.14.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.14.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.14.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.15.w1.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.15.w3.weight": "model-00034-of-00053.safetensors", + "model.layers.33.block_sparse_moe.experts.15.w2.weight": "model-00034-of-00053.safetensors", + "model.layers.34.input_layernorm.weight": "model-00035-of-00053.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00035-of-00053.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00035-of-00053.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00035-of-00053.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00035-of-00053.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00035-of-00053.safetensors", + "model.layers.34.self_attn.rotary_emb.inv_freq": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.8.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.8.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.8.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.9.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.9.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.9.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.10.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.10.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.10.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.11.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.11.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.11.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.12.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.12.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.12.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.13.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.13.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.13.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.14.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.14.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.14.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.15.w1.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.15.w3.weight": "model-00035-of-00053.safetensors", + "model.layers.34.block_sparse_moe.experts.15.w2.weight": "model-00035-of-00053.safetensors", + "model.layers.35.input_layernorm.weight": "model-00036-of-00053.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00036-of-00053.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00036-of-00053.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00036-of-00053.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00036-of-00053.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00036-of-00053.safetensors", + "model.layers.35.self_attn.rotary_emb.inv_freq": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.8.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.8.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.8.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.9.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.9.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.9.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.10.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.10.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.10.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.11.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.11.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.11.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.12.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.12.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.12.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.13.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.13.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.13.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.14.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.14.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.14.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.15.w1.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.15.w3.weight": "model-00036-of-00053.safetensors", + "model.layers.35.block_sparse_moe.experts.15.w2.weight": "model-00036-of-00053.safetensors", + "model.layers.36.input_layernorm.weight": "model-00037-of-00053.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00037-of-00053.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00037-of-00053.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00037-of-00053.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00037-of-00053.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00037-of-00053.safetensors", + "model.layers.36.self_attn.rotary_emb.inv_freq": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.8.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.8.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.8.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.9.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.9.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.9.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.10.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.10.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.10.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.11.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.11.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.11.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.12.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.12.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.12.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.13.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.13.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.13.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.14.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.14.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.14.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.15.w1.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.15.w3.weight": "model-00037-of-00053.safetensors", + "model.layers.36.block_sparse_moe.experts.15.w2.weight": "model-00037-of-00053.safetensors", + "model.layers.37.input_layernorm.weight": "model-00038-of-00053.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00038-of-00053.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00038-of-00053.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00038-of-00053.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00038-of-00053.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00038-of-00053.safetensors", + "model.layers.37.self_attn.rotary_emb.inv_freq": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.8.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.8.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.8.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.9.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.9.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.9.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.10.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.10.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.10.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.11.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.11.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.11.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.12.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.12.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.12.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.13.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.13.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.13.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.14.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.14.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.14.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.15.w1.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.15.w3.weight": "model-00038-of-00053.safetensors", + "model.layers.37.block_sparse_moe.experts.15.w2.weight": "model-00038-of-00053.safetensors", + "model.layers.38.input_layernorm.weight": "model-00039-of-00053.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00039-of-00053.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00039-of-00053.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00039-of-00053.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00039-of-00053.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00039-of-00053.safetensors", + "model.layers.38.self_attn.rotary_emb.inv_freq": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.8.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.8.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.8.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.9.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.9.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.9.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.10.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.10.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.10.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.11.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.11.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.11.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.12.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.12.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.12.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.13.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.13.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.13.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.14.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.14.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.14.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.15.w1.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.15.w3.weight": "model-00039-of-00053.safetensors", + "model.layers.38.block_sparse_moe.experts.15.w2.weight": "model-00039-of-00053.safetensors", + "model.layers.39.input_layernorm.weight": "model-00040-of-00053.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00040-of-00053.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00040-of-00053.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00040-of-00053.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00040-of-00053.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00040-of-00053.safetensors", + "model.layers.39.self_attn.rotary_emb.inv_freq": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.8.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.8.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.8.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.9.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.9.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.9.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.10.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.10.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.10.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.11.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.11.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.11.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.12.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.12.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.12.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.13.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.13.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.13.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.14.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.14.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.14.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.15.w1.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.15.w3.weight": "model-00040-of-00053.safetensors", + "model.layers.39.block_sparse_moe.experts.15.w2.weight": "model-00040-of-00053.safetensors", + "model.layers.40.input_layernorm.weight": "model-00041-of-00053.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00041-of-00053.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00041-of-00053.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00041-of-00053.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00041-of-00053.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00041-of-00053.safetensors", + "model.layers.40.self_attn.rotary_emb.inv_freq": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.8.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.8.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.8.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.9.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.9.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.9.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.10.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.10.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.10.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.11.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.11.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.11.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.12.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.12.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.12.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.13.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.13.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.13.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.14.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.14.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.14.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.15.w1.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.15.w3.weight": "model-00041-of-00053.safetensors", + "model.layers.40.block_sparse_moe.experts.15.w2.weight": "model-00041-of-00053.safetensors", + "model.layers.41.input_layernorm.weight": "model-00042-of-00053.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00042-of-00053.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00042-of-00053.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00042-of-00053.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00042-of-00053.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00042-of-00053.safetensors", + "model.layers.41.self_attn.rotary_emb.inv_freq": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.8.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.8.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.8.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.9.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.9.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.9.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.10.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.10.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.10.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.11.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.11.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.11.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.12.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.12.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.12.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.13.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.13.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.13.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.14.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.14.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.14.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.15.w1.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.15.w3.weight": "model-00042-of-00053.safetensors", + "model.layers.41.block_sparse_moe.experts.15.w2.weight": "model-00042-of-00053.safetensors", + "model.layers.42.input_layernorm.weight": "model-00043-of-00053.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00043-of-00053.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00043-of-00053.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00043-of-00053.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00043-of-00053.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00043-of-00053.safetensors", + "model.layers.42.self_attn.rotary_emb.inv_freq": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.8.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.8.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.8.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.9.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.9.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.9.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.10.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.10.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.10.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.11.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.11.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.11.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.12.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.12.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.12.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.13.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.13.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.13.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.14.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.14.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.14.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.15.w1.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.15.w3.weight": "model-00043-of-00053.safetensors", + "model.layers.42.block_sparse_moe.experts.15.w2.weight": "model-00043-of-00053.safetensors", + "model.layers.43.input_layernorm.weight": "model-00044-of-00053.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00044-of-00053.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00044-of-00053.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00044-of-00053.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00044-of-00053.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00044-of-00053.safetensors", + "model.layers.43.self_attn.rotary_emb.inv_freq": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.8.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.8.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.8.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.9.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.9.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.9.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.10.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.10.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.10.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.11.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.11.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.11.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.12.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.12.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.12.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.13.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.13.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.13.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.14.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.14.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.14.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.15.w1.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.15.w3.weight": "model-00044-of-00053.safetensors", + "model.layers.43.block_sparse_moe.experts.15.w2.weight": "model-00044-of-00053.safetensors", + "model.layers.44.input_layernorm.weight": "model-00045-of-00053.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00045-of-00053.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00045-of-00053.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00045-of-00053.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00045-of-00053.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00045-of-00053.safetensors", + "model.layers.44.self_attn.rotary_emb.inv_freq": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.8.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.8.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.8.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.9.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.9.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.9.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.10.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.10.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.10.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.11.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.11.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.11.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.12.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.12.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.12.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.13.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.13.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.13.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.14.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.14.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.14.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.15.w1.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.15.w3.weight": "model-00045-of-00053.safetensors", + "model.layers.44.block_sparse_moe.experts.15.w2.weight": "model-00045-of-00053.safetensors", + "model.layers.45.input_layernorm.weight": "model-00046-of-00053.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00046-of-00053.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00046-of-00053.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00046-of-00053.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00046-of-00053.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00046-of-00053.safetensors", + "model.layers.45.self_attn.rotary_emb.inv_freq": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.8.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.8.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.8.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.9.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.9.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.9.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.10.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.10.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.10.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.11.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.11.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.11.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.12.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.12.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.12.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.13.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.13.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.13.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.14.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.14.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.14.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.15.w1.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.15.w3.weight": "model-00046-of-00053.safetensors", + "model.layers.45.block_sparse_moe.experts.15.w2.weight": "model-00046-of-00053.safetensors", + "model.layers.46.input_layernorm.weight": "model-00047-of-00053.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00047-of-00053.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00047-of-00053.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00047-of-00053.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00047-of-00053.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00047-of-00053.safetensors", + "model.layers.46.self_attn.rotary_emb.inv_freq": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.8.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.8.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.8.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.9.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.9.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.9.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.10.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.10.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.10.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.11.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.11.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.11.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.12.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.12.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.12.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.13.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.13.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.13.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.14.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.14.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.14.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.15.w1.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.15.w3.weight": "model-00047-of-00053.safetensors", + "model.layers.46.block_sparse_moe.experts.15.w2.weight": "model-00047-of-00053.safetensors", + "model.layers.47.input_layernorm.weight": "model-00048-of-00053.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00048-of-00053.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00048-of-00053.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00048-of-00053.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00048-of-00053.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00048-of-00053.safetensors", + "model.layers.47.self_attn.rotary_emb.inv_freq": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.8.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.8.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.8.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.9.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.9.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.9.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.10.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.10.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.10.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.11.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.11.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.11.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.12.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.12.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.12.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.13.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.13.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.13.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.14.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.14.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.14.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.15.w1.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.15.w3.weight": "model-00048-of-00053.safetensors", + "model.layers.47.block_sparse_moe.experts.15.w2.weight": "model-00048-of-00053.safetensors", + "model.layers.48.input_layernorm.weight": "model-00049-of-00053.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00049-of-00053.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00049-of-00053.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00049-of-00053.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00049-of-00053.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00049-of-00053.safetensors", + "model.layers.48.self_attn.rotary_emb.inv_freq": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.8.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.8.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.8.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.9.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.9.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.9.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.10.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.10.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.10.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.11.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.11.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.11.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.12.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.12.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.12.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.13.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.13.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.13.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.14.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.14.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.14.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.15.w1.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.15.w3.weight": "model-00049-of-00053.safetensors", + "model.layers.48.block_sparse_moe.experts.15.w2.weight": "model-00049-of-00053.safetensors", + "model.layers.49.input_layernorm.weight": "model-00050-of-00053.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00050-of-00053.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00050-of-00053.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00050-of-00053.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00050-of-00053.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00050-of-00053.safetensors", + "model.layers.49.self_attn.rotary_emb.inv_freq": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.8.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.8.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.8.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.9.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.9.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.9.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.10.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.10.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.10.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.11.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.11.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.11.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.12.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.12.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.12.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.13.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.13.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.13.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.14.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.14.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.14.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.15.w1.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.15.w3.weight": "model-00050-of-00053.safetensors", + "model.layers.49.block_sparse_moe.experts.15.w2.weight": "model-00050-of-00053.safetensors", + "model.layers.50.input_layernorm.weight": "model-00051-of-00053.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00051-of-00053.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00051-of-00053.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00051-of-00053.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00051-of-00053.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00051-of-00053.safetensors", + "model.layers.50.self_attn.rotary_emb.inv_freq": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.8.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.8.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.8.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.9.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.9.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.9.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.10.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.10.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.10.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.11.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.11.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.11.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.12.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.12.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.12.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.13.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.13.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.13.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.14.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.14.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.14.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.15.w1.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.15.w3.weight": "model-00051-of-00053.safetensors", + "model.layers.50.block_sparse_moe.experts.15.w2.weight": "model-00051-of-00053.safetensors", + "model.layers.51.input_layernorm.weight": "model-00052-of-00053.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00052-of-00053.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00052-of-00053.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00052-of-00053.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00052-of-00053.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00052-of-00053.safetensors", + "model.layers.51.self_attn.rotary_emb.inv_freq": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.8.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.8.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.8.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.9.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.9.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.9.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.10.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.10.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.10.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.11.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.11.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.11.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.12.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.12.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.12.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.13.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.13.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.13.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.14.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.14.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.14.w2.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.15.w1.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.15.w3.weight": "model-00052-of-00053.safetensors", + "model.layers.51.block_sparse_moe.experts.15.w2.weight": "model-00052-of-00053.safetensors", + "model.norm.weight": "model-00053-of-00053.safetensors", + "model.embed_tokens.weight": "model-00053-of-00053.safetensors", + "lm_head.weight": "model-00053-of-00053.safetensors" + } +} \ No newline at end of file