nightmedia commited on
Commit
3471b01
·
verified ·
1 Parent(s): 2a2087c

Add files using upload-large-folder tool

Browse files
README.md CHANGED
@@ -11,10 +11,6 @@ pipeline_tag: text-generation
11
 
12
  # unsloth-Qwen3-Coder-30B-A3B-Instruct-qx4-mlx
13
 
14
- test model
15
-
16
- this is part of a series created to evaluate the effect of quanting with mixed precision
17
-
18
  This model [unsloth-Qwen3-Coder-30B-A3B-Instruct-qx4-mlx](https://huggingface.co/unsloth-Qwen3-Coder-30B-A3B-Instruct-qx4-mlx) was
19
  converted to MLX format from [unsloth/Qwen3-Coder-30B-A3B-Instruct](https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct)
20
  using mlx-lm version **0.26.3**.
 
11
 
12
  # unsloth-Qwen3-Coder-30B-A3B-Instruct-qx4-mlx
13
 
 
 
 
 
14
  This model [unsloth-Qwen3-Coder-30B-A3B-Instruct-qx4-mlx](https://huggingface.co/unsloth-Qwen3-Coder-30B-A3B-Instruct-qx4-mlx) was
15
  converted to MLX format from [unsloth/Qwen3-Coder-30B-A3B-Instruct](https://huggingface.co/unsloth/Qwen3-Coder-30B-A3B-Instruct)
16
  using mlx-lm version **0.26.3**.
config.json CHANGED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57a0225b8bbe916570a8849c38eaa7fc20b0414f528a8b1f81ad34c967b451ac
3
- size 5299740222
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e761cd56755302de9e21dcdca6c453a6a926092c593bb2fa59d1a38ac22a807e
3
+ size 5243957509
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d23e1d0415c0747fe9cb502ad4c687ece7a314701cdb991073fe459cb54528f2
3
- size 5273751967
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a5c492d492634f72d19fc5226f4e97c2e959f3f347d273e41ca4125380115ba
3
+ size 5365752431
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b3d34dc0d89773bdb87ca7b249dceb89345816ee94f7772d4c6e514bcc0c340
3
- size 5347441648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db958a069ee4b91f0403105aff116bc8a35534f30f0fd4f07b38ce0c95c21a8d
3
+ size 5365337594
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b26dbea76f5f12408512cf823d7f6c2f05684a879e37ae78c4e32d33d77bdfa
3
- size 3235262240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c95aea3d8d36cbf74e2b88ef1b610131bf3575b1eb4c51a4f0214fd982d8dc8e
3
+ size 4390779495
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 19156037632,
4
  "total_parameters": 30532122624
5
  },
6
  "weight_map": {
@@ -98,9 +98,9 @@
98
  "model.layers.11.mlp.gate.biases": "model-00001-of-00004.safetensors",
99
  "model.layers.11.mlp.gate.scales": "model-00001-of-00004.safetensors",
100
  "model.layers.11.mlp.gate.weight": "model-00001-of-00004.safetensors",
101
- "model.layers.11.mlp.switch_mlp.down_proj.biases": "model-00001-of-00004.safetensors",
102
- "model.layers.11.mlp.switch_mlp.down_proj.scales": "model-00001-of-00004.safetensors",
103
- "model.layers.11.mlp.switch_mlp.down_proj.weight": "model-00001-of-00004.safetensors",
104
  "model.layers.11.mlp.switch_mlp.gate_proj.biases": "model-00001-of-00004.safetensors",
105
  "model.layers.11.mlp.switch_mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
106
  "model.layers.11.mlp.switch_mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
@@ -122,34 +122,34 @@
122
  "model.layers.11.self_attn.v_proj.biases": "model-00001-of-00004.safetensors",
123
  "model.layers.11.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
124
  "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
125
- "model.layers.12.input_layernorm.weight": "model-00001-of-00004.safetensors",
126
- "model.layers.12.mlp.gate.biases": "model-00001-of-00004.safetensors",
127
- "model.layers.12.mlp.gate.scales": "model-00001-of-00004.safetensors",
128
- "model.layers.12.mlp.gate.weight": "model-00001-of-00004.safetensors",
129
  "model.layers.12.mlp.switch_mlp.down_proj.biases": "model-00002-of-00004.safetensors",
130
  "model.layers.12.mlp.switch_mlp.down_proj.scales": "model-00002-of-00004.safetensors",
131
  "model.layers.12.mlp.switch_mlp.down_proj.weight": "model-00002-of-00004.safetensors",
132
- "model.layers.12.mlp.switch_mlp.gate_proj.biases": "model-00001-of-00004.safetensors",
133
- "model.layers.12.mlp.switch_mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
134
- "model.layers.12.mlp.switch_mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
135
  "model.layers.12.mlp.switch_mlp.up_proj.biases": "model-00002-of-00004.safetensors",
136
  "model.layers.12.mlp.switch_mlp.up_proj.scales": "model-00002-of-00004.safetensors",
137
  "model.layers.12.mlp.switch_mlp.up_proj.weight": "model-00002-of-00004.safetensors",
138
- "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
139
- "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
140
- "model.layers.12.self_attn.k_proj.biases": "model-00001-of-00004.safetensors",
141
- "model.layers.12.self_attn.k_proj.scales": "model-00001-of-00004.safetensors",
142
- "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
143
- "model.layers.12.self_attn.o_proj.biases": "model-00001-of-00004.safetensors",
144
- "model.layers.12.self_attn.o_proj.scales": "model-00001-of-00004.safetensors",
145
- "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
146
- "model.layers.12.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
147
- "model.layers.12.self_attn.q_proj.biases": "model-00001-of-00004.safetensors",
148
- "model.layers.12.self_attn.q_proj.scales": "model-00001-of-00004.safetensors",
149
- "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
150
- "model.layers.12.self_attn.v_proj.biases": "model-00001-of-00004.safetensors",
151
- "model.layers.12.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
152
- "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
153
  "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
154
  "model.layers.13.mlp.gate.biases": "model-00002-of-00004.safetensors",
155
  "model.layers.13.mlp.gate.scales": "model-00002-of-00004.safetensors",
@@ -490,7 +490,7 @@
490
  "model.layers.24.mlp.gate.biases": "model-00002-of-00004.safetensors",
491
  "model.layers.24.mlp.gate.scales": "model-00002-of-00004.safetensors",
492
  "model.layers.24.mlp.gate.weight": "model-00002-of-00004.safetensors",
493
- "model.layers.24.mlp.switch_mlp.down_proj.biases": "model-00002-of-00004.safetensors",
494
  "model.layers.24.mlp.switch_mlp.down_proj.scales": "model-00002-of-00004.safetensors",
495
  "model.layers.24.mlp.switch_mlp.down_proj.weight": "model-00002-of-00004.safetensors",
496
  "model.layers.24.mlp.switch_mlp.gate_proj.biases": "model-00002-of-00004.safetensors",
@@ -514,62 +514,62 @@
514
  "model.layers.24.self_attn.v_proj.biases": "model-00002-of-00004.safetensors",
515
  "model.layers.24.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
516
  "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
517
- "model.layers.25.input_layernorm.weight": "model-00002-of-00004.safetensors",
518
- "model.layers.25.mlp.gate.biases": "model-00002-of-00004.safetensors",
519
- "model.layers.25.mlp.gate.scales": "model-00002-of-00004.safetensors",
520
- "model.layers.25.mlp.gate.weight": "model-00002-of-00004.safetensors",
521
- "model.layers.25.mlp.switch_mlp.down_proj.biases": "model-00002-of-00004.safetensors",
522
- "model.layers.25.mlp.switch_mlp.down_proj.scales": "model-00002-of-00004.safetensors",
523
- "model.layers.25.mlp.switch_mlp.down_proj.weight": "model-00002-of-00004.safetensors",
524
- "model.layers.25.mlp.switch_mlp.gate_proj.biases": "model-00002-of-00004.safetensors",
525
- "model.layers.25.mlp.switch_mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
526
- "model.layers.25.mlp.switch_mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
527
- "model.layers.25.mlp.switch_mlp.up_proj.biases": "model-00002-of-00004.safetensors",
528
- "model.layers.25.mlp.switch_mlp.up_proj.scales": "model-00002-of-00004.safetensors",
529
- "model.layers.25.mlp.switch_mlp.up_proj.weight": "model-00002-of-00004.safetensors",
530
- "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
531
- "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
532
- "model.layers.25.self_attn.k_proj.biases": "model-00002-of-00004.safetensors",
533
- "model.layers.25.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
534
- "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
535
- "model.layers.25.self_attn.o_proj.biases": "model-00002-of-00004.safetensors",
536
- "model.layers.25.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
537
- "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
538
- "model.layers.25.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
539
- "model.layers.25.self_attn.q_proj.biases": "model-00002-of-00004.safetensors",
540
- "model.layers.25.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
541
- "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
542
- "model.layers.25.self_attn.v_proj.biases": "model-00002-of-00004.safetensors",
543
- "model.layers.25.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
544
- "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
545
- "model.layers.26.input_layernorm.weight": "model-00002-of-00004.safetensors",
546
- "model.layers.26.mlp.gate.biases": "model-00002-of-00004.safetensors",
547
- "model.layers.26.mlp.gate.scales": "model-00002-of-00004.safetensors",
548
- "model.layers.26.mlp.gate.weight": "model-00002-of-00004.safetensors",
549
  "model.layers.26.mlp.switch_mlp.down_proj.biases": "model-00003-of-00004.safetensors",
550
  "model.layers.26.mlp.switch_mlp.down_proj.scales": "model-00003-of-00004.safetensors",
551
  "model.layers.26.mlp.switch_mlp.down_proj.weight": "model-00003-of-00004.safetensors",
552
- "model.layers.26.mlp.switch_mlp.gate_proj.biases": "model-00002-of-00004.safetensors",
553
- "model.layers.26.mlp.switch_mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
554
- "model.layers.26.mlp.switch_mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
555
- "model.layers.26.mlp.switch_mlp.up_proj.biases": "model-00002-of-00004.safetensors",
556
- "model.layers.26.mlp.switch_mlp.up_proj.scales": "model-00002-of-00004.safetensors",
557
- "model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00002-of-00004.safetensors",
558
- "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
559
- "model.layers.26.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
560
- "model.layers.26.self_attn.k_proj.biases": "model-00002-of-00004.safetensors",
561
- "model.layers.26.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
562
- "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
563
- "model.layers.26.self_attn.o_proj.biases": "model-00002-of-00004.safetensors",
564
- "model.layers.26.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
565
- "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
566
- "model.layers.26.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
567
- "model.layers.26.self_attn.q_proj.biases": "model-00002-of-00004.safetensors",
568
- "model.layers.26.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
569
- "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
570
- "model.layers.26.self_attn.v_proj.biases": "model-00002-of-00004.safetensors",
571
- "model.layers.26.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
572
- "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
573
  "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
574
  "model.layers.27.mlp.gate.biases": "model-00003-of-00004.safetensors",
575
  "model.layers.27.mlp.gate.scales": "model-00003-of-00004.safetensors",
@@ -910,15 +910,15 @@
910
  "model.layers.38.mlp.gate.biases": "model-00003-of-00004.safetensors",
911
  "model.layers.38.mlp.gate.scales": "model-00003-of-00004.safetensors",
912
  "model.layers.38.mlp.gate.weight": "model-00003-of-00004.safetensors",
913
- "model.layers.38.mlp.switch_mlp.down_proj.biases": "model-00003-of-00004.safetensors",
914
- "model.layers.38.mlp.switch_mlp.down_proj.scales": "model-00003-of-00004.safetensors",
915
- "model.layers.38.mlp.switch_mlp.down_proj.weight": "model-00003-of-00004.safetensors",
916
  "model.layers.38.mlp.switch_mlp.gate_proj.biases": "model-00003-of-00004.safetensors",
917
  "model.layers.38.mlp.switch_mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
918
  "model.layers.38.mlp.switch_mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
919
- "model.layers.38.mlp.switch_mlp.up_proj.biases": "model-00003-of-00004.safetensors",
920
- "model.layers.38.mlp.switch_mlp.up_proj.scales": "model-00003-of-00004.safetensors",
921
- "model.layers.38.mlp.switch_mlp.up_proj.weight": "model-00003-of-00004.safetensors",
922
  "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
923
  "model.layers.38.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
924
  "model.layers.38.self_attn.k_proj.biases": "model-00003-of-00004.safetensors",
@@ -934,34 +934,34 @@
934
  "model.layers.38.self_attn.v_proj.biases": "model-00003-of-00004.safetensors",
935
  "model.layers.38.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
936
  "model.layers.38.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
937
- "model.layers.39.input_layernorm.weight": "model-00003-of-00004.safetensors",
938
- "model.layers.39.mlp.gate.biases": "model-00003-of-00004.safetensors",
939
- "model.layers.39.mlp.gate.scales": "model-00003-of-00004.safetensors",
940
- "model.layers.39.mlp.gate.weight": "model-00003-of-00004.safetensors",
941
- "model.layers.39.mlp.switch_mlp.down_proj.biases": "model-00003-of-00004.safetensors",
942
- "model.layers.39.mlp.switch_mlp.down_proj.scales": "model-00003-of-00004.safetensors",
943
- "model.layers.39.mlp.switch_mlp.down_proj.weight": "model-00003-of-00004.safetensors",
944
- "model.layers.39.mlp.switch_mlp.gate_proj.biases": "model-00003-of-00004.safetensors",
945
- "model.layers.39.mlp.switch_mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
946
- "model.layers.39.mlp.switch_mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
947
- "model.layers.39.mlp.switch_mlp.up_proj.biases": "model-00003-of-00004.safetensors",
948
- "model.layers.39.mlp.switch_mlp.up_proj.scales": "model-00003-of-00004.safetensors",
949
- "model.layers.39.mlp.switch_mlp.up_proj.weight": "model-00003-of-00004.safetensors",
950
- "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
951
- "model.layers.39.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
952
- "model.layers.39.self_attn.k_proj.biases": "model-00003-of-00004.safetensors",
953
- "model.layers.39.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
954
- "model.layers.39.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
955
- "model.layers.39.self_attn.o_proj.biases": "model-00003-of-00004.safetensors",
956
- "model.layers.39.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
957
- "model.layers.39.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
958
- "model.layers.39.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
959
- "model.layers.39.self_attn.q_proj.biases": "model-00003-of-00004.safetensors",
960
- "model.layers.39.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
961
- "model.layers.39.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
962
- "model.layers.39.self_attn.v_proj.biases": "model-00003-of-00004.safetensors",
963
- "model.layers.39.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
964
- "model.layers.39.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
965
  "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
966
  "model.layers.4.mlp.gate.biases": "model-00001-of-00004.safetensors",
967
  "model.layers.4.mlp.gate.scales": "model-00001-of-00004.safetensors",
@@ -990,38 +990,38 @@
990
  "model.layers.4.self_attn.v_proj.biases": "model-00001-of-00004.safetensors",
991
  "model.layers.4.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
992
  "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
993
- "model.layers.40.input_layernorm.weight": "model-00003-of-00004.safetensors",
994
- "model.layers.40.mlp.gate.biases": "model-00003-of-00004.safetensors",
995
- "model.layers.40.mlp.gate.scales": "model-00003-of-00004.safetensors",
996
- "model.layers.40.mlp.gate.weight": "model-00003-of-00004.safetensors",
997
- "model.layers.40.mlp.switch_mlp.down_proj.biases": "model-00003-of-00004.safetensors",
998
- "model.layers.40.mlp.switch_mlp.down_proj.scales": "model-00003-of-00004.safetensors",
999
- "model.layers.40.mlp.switch_mlp.down_proj.weight": "model-00003-of-00004.safetensors",
1000
- "model.layers.40.mlp.switch_mlp.gate_proj.biases": "model-00003-of-00004.safetensors",
1001
- "model.layers.40.mlp.switch_mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
1002
- "model.layers.40.mlp.switch_mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
1003
- "model.layers.40.mlp.switch_mlp.up_proj.biases": "model-00003-of-00004.safetensors",
1004
- "model.layers.40.mlp.switch_mlp.up_proj.scales": "model-00003-of-00004.safetensors",
1005
- "model.layers.40.mlp.switch_mlp.up_proj.weight": "model-00003-of-00004.safetensors",
1006
- "model.layers.40.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
1007
- "model.layers.40.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
1008
- "model.layers.40.self_attn.k_proj.biases": "model-00003-of-00004.safetensors",
1009
- "model.layers.40.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
1010
- "model.layers.40.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
1011
- "model.layers.40.self_attn.o_proj.biases": "model-00003-of-00004.safetensors",
1012
- "model.layers.40.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
1013
- "model.layers.40.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
1014
- "model.layers.40.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
1015
- "model.layers.40.self_attn.q_proj.biases": "model-00003-of-00004.safetensors",
1016
- "model.layers.40.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
1017
- "model.layers.40.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
1018
- "model.layers.40.self_attn.v_proj.biases": "model-00003-of-00004.safetensors",
1019
- "model.layers.40.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
1020
- "model.layers.40.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
1021
- "model.layers.41.input_layernorm.weight": "model-00003-of-00004.safetensors",
1022
- "model.layers.41.mlp.gate.biases": "model-00003-of-00004.safetensors",
1023
- "model.layers.41.mlp.gate.scales": "model-00003-of-00004.safetensors",
1024
- "model.layers.41.mlp.gate.weight": "model-00003-of-00004.safetensors",
1025
  "model.layers.41.mlp.switch_mlp.down_proj.biases": "model-00004-of-00004.safetensors",
1026
  "model.layers.41.mlp.switch_mlp.down_proj.scales": "model-00004-of-00004.safetensors",
1027
  "model.layers.41.mlp.switch_mlp.down_proj.weight": "model-00004-of-00004.safetensors",
@@ -1031,21 +1031,21 @@
1031
  "model.layers.41.mlp.switch_mlp.up_proj.biases": "model-00004-of-00004.safetensors",
1032
  "model.layers.41.mlp.switch_mlp.up_proj.scales": "model-00004-of-00004.safetensors",
1033
  "model.layers.41.mlp.switch_mlp.up_proj.weight": "model-00004-of-00004.safetensors",
1034
- "model.layers.41.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
1035
- "model.layers.41.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
1036
- "model.layers.41.self_attn.k_proj.biases": "model-00003-of-00004.safetensors",
1037
- "model.layers.41.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
1038
- "model.layers.41.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
1039
- "model.layers.41.self_attn.o_proj.biases": "model-00003-of-00004.safetensors",
1040
- "model.layers.41.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
1041
- "model.layers.41.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
1042
- "model.layers.41.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
1043
- "model.layers.41.self_attn.q_proj.biases": "model-00003-of-00004.safetensors",
1044
- "model.layers.41.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
1045
- "model.layers.41.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
1046
- "model.layers.41.self_attn.v_proj.biases": "model-00003-of-00004.safetensors",
1047
- "model.layers.41.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
1048
- "model.layers.41.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
1049
  "model.layers.42.input_layernorm.weight": "model-00004-of-00004.safetensors",
1050
  "model.layers.42.mlp.gate.biases": "model-00004-of-00004.safetensors",
1051
  "model.layers.42.mlp.gate.scales": "model-00004-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 20365668352,
4
  "total_parameters": 30532122624
5
  },
6
  "weight_map": {
 
98
  "model.layers.11.mlp.gate.biases": "model-00001-of-00004.safetensors",
99
  "model.layers.11.mlp.gate.scales": "model-00001-of-00004.safetensors",
100
  "model.layers.11.mlp.gate.weight": "model-00001-of-00004.safetensors",
101
+ "model.layers.11.mlp.switch_mlp.down_proj.biases": "model-00002-of-00004.safetensors",
102
+ "model.layers.11.mlp.switch_mlp.down_proj.scales": "model-00002-of-00004.safetensors",
103
+ "model.layers.11.mlp.switch_mlp.down_proj.weight": "model-00002-of-00004.safetensors",
104
  "model.layers.11.mlp.switch_mlp.gate_proj.biases": "model-00001-of-00004.safetensors",
105
  "model.layers.11.mlp.switch_mlp.gate_proj.scales": "model-00001-of-00004.safetensors",
106
  "model.layers.11.mlp.switch_mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
 
122
  "model.layers.11.self_attn.v_proj.biases": "model-00001-of-00004.safetensors",
123
  "model.layers.11.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
124
  "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
125
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
126
+ "model.layers.12.mlp.gate.biases": "model-00002-of-00004.safetensors",
127
+ "model.layers.12.mlp.gate.scales": "model-00002-of-00004.safetensors",
128
+ "model.layers.12.mlp.gate.weight": "model-00002-of-00004.safetensors",
129
  "model.layers.12.mlp.switch_mlp.down_proj.biases": "model-00002-of-00004.safetensors",
130
  "model.layers.12.mlp.switch_mlp.down_proj.scales": "model-00002-of-00004.safetensors",
131
  "model.layers.12.mlp.switch_mlp.down_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.12.mlp.switch_mlp.gate_proj.biases": "model-00002-of-00004.safetensors",
133
+ "model.layers.12.mlp.switch_mlp.gate_proj.scales": "model-00002-of-00004.safetensors",
134
+ "model.layers.12.mlp.switch_mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
135
  "model.layers.12.mlp.switch_mlp.up_proj.biases": "model-00002-of-00004.safetensors",
136
  "model.layers.12.mlp.switch_mlp.up_proj.scales": "model-00002-of-00004.safetensors",
137
  "model.layers.12.mlp.switch_mlp.up_proj.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
139
+ "model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
140
+ "model.layers.12.self_attn.k_proj.biases": "model-00002-of-00004.safetensors",
141
+ "model.layers.12.self_attn.k_proj.scales": "model-00002-of-00004.safetensors",
142
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
143
+ "model.layers.12.self_attn.o_proj.biases": "model-00002-of-00004.safetensors",
144
+ "model.layers.12.self_attn.o_proj.scales": "model-00002-of-00004.safetensors",
145
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
146
+ "model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
147
+ "model.layers.12.self_attn.q_proj.biases": "model-00002-of-00004.safetensors",
148
+ "model.layers.12.self_attn.q_proj.scales": "model-00002-of-00004.safetensors",
149
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
150
+ "model.layers.12.self_attn.v_proj.biases": "model-00002-of-00004.safetensors",
151
+ "model.layers.12.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
152
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
153
  "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
154
  "model.layers.13.mlp.gate.biases": "model-00002-of-00004.safetensors",
155
  "model.layers.13.mlp.gate.scales": "model-00002-of-00004.safetensors",
 
490
  "model.layers.24.mlp.gate.biases": "model-00002-of-00004.safetensors",
491
  "model.layers.24.mlp.gate.scales": "model-00002-of-00004.safetensors",
492
  "model.layers.24.mlp.gate.weight": "model-00002-of-00004.safetensors",
493
+ "model.layers.24.mlp.switch_mlp.down_proj.biases": "model-00003-of-00004.safetensors",
494
  "model.layers.24.mlp.switch_mlp.down_proj.scales": "model-00002-of-00004.safetensors",
495
  "model.layers.24.mlp.switch_mlp.down_proj.weight": "model-00002-of-00004.safetensors",
496
  "model.layers.24.mlp.switch_mlp.gate_proj.biases": "model-00002-of-00004.safetensors",
 
514
  "model.layers.24.self_attn.v_proj.biases": "model-00002-of-00004.safetensors",
515
  "model.layers.24.self_attn.v_proj.scales": "model-00002-of-00004.safetensors",
516
  "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
517
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
518
+ "model.layers.25.mlp.gate.biases": "model-00003-of-00004.safetensors",
519
+ "model.layers.25.mlp.gate.scales": "model-00003-of-00004.safetensors",
520
+ "model.layers.25.mlp.gate.weight": "model-00003-of-00004.safetensors",
521
+ "model.layers.25.mlp.switch_mlp.down_proj.biases": "model-00003-of-00004.safetensors",
522
+ "model.layers.25.mlp.switch_mlp.down_proj.scales": "model-00003-of-00004.safetensors",
523
+ "model.layers.25.mlp.switch_mlp.down_proj.weight": "model-00003-of-00004.safetensors",
524
+ "model.layers.25.mlp.switch_mlp.gate_proj.biases": "model-00003-of-00004.safetensors",
525
+ "model.layers.25.mlp.switch_mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
526
+ "model.layers.25.mlp.switch_mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
527
+ "model.layers.25.mlp.switch_mlp.up_proj.biases": "model-00003-of-00004.safetensors",
528
+ "model.layers.25.mlp.switch_mlp.up_proj.scales": "model-00003-of-00004.safetensors",
529
+ "model.layers.25.mlp.switch_mlp.up_proj.weight": "model-00003-of-00004.safetensors",
530
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
531
+ "model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
532
+ "model.layers.25.self_attn.k_proj.biases": "model-00003-of-00004.safetensors",
533
+ "model.layers.25.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
534
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
535
+ "model.layers.25.self_attn.o_proj.biases": "model-00003-of-00004.safetensors",
536
+ "model.layers.25.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
537
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
538
+ "model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
539
+ "model.layers.25.self_attn.q_proj.biases": "model-00003-of-00004.safetensors",
540
+ "model.layers.25.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
541
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
542
+ "model.layers.25.self_attn.v_proj.biases": "model-00003-of-00004.safetensors",
543
+ "model.layers.25.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
544
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
545
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
546
+ "model.layers.26.mlp.gate.biases": "model-00003-of-00004.safetensors",
547
+ "model.layers.26.mlp.gate.scales": "model-00003-of-00004.safetensors",
548
+ "model.layers.26.mlp.gate.weight": "model-00003-of-00004.safetensors",
549
  "model.layers.26.mlp.switch_mlp.down_proj.biases": "model-00003-of-00004.safetensors",
550
  "model.layers.26.mlp.switch_mlp.down_proj.scales": "model-00003-of-00004.safetensors",
551
  "model.layers.26.mlp.switch_mlp.down_proj.weight": "model-00003-of-00004.safetensors",
552
+ "model.layers.26.mlp.switch_mlp.gate_proj.biases": "model-00003-of-00004.safetensors",
553
+ "model.layers.26.mlp.switch_mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
554
+ "model.layers.26.mlp.switch_mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
555
+ "model.layers.26.mlp.switch_mlp.up_proj.biases": "model-00003-of-00004.safetensors",
556
+ "model.layers.26.mlp.switch_mlp.up_proj.scales": "model-00003-of-00004.safetensors",
557
+ "model.layers.26.mlp.switch_mlp.up_proj.weight": "model-00003-of-00004.safetensors",
558
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
559
+ "model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
560
+ "model.layers.26.self_attn.k_proj.biases": "model-00003-of-00004.safetensors",
561
+ "model.layers.26.self_attn.k_proj.scales": "model-00003-of-00004.safetensors",
562
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
563
+ "model.layers.26.self_attn.o_proj.biases": "model-00003-of-00004.safetensors",
564
+ "model.layers.26.self_attn.o_proj.scales": "model-00003-of-00004.safetensors",
565
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
566
+ "model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
567
+ "model.layers.26.self_attn.q_proj.biases": "model-00003-of-00004.safetensors",
568
+ "model.layers.26.self_attn.q_proj.scales": "model-00003-of-00004.safetensors",
569
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
570
+ "model.layers.26.self_attn.v_proj.biases": "model-00003-of-00004.safetensors",
571
+ "model.layers.26.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
572
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
573
  "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
574
  "model.layers.27.mlp.gate.biases": "model-00003-of-00004.safetensors",
575
  "model.layers.27.mlp.gate.scales": "model-00003-of-00004.safetensors",
 
910
  "model.layers.38.mlp.gate.biases": "model-00003-of-00004.safetensors",
911
  "model.layers.38.mlp.gate.scales": "model-00003-of-00004.safetensors",
912
  "model.layers.38.mlp.gate.weight": "model-00003-of-00004.safetensors",
913
+ "model.layers.38.mlp.switch_mlp.down_proj.biases": "model-00004-of-00004.safetensors",
914
+ "model.layers.38.mlp.switch_mlp.down_proj.scales": "model-00004-of-00004.safetensors",
915
+ "model.layers.38.mlp.switch_mlp.down_proj.weight": "model-00004-of-00004.safetensors",
916
  "model.layers.38.mlp.switch_mlp.gate_proj.biases": "model-00003-of-00004.safetensors",
917
  "model.layers.38.mlp.switch_mlp.gate_proj.scales": "model-00003-of-00004.safetensors",
918
  "model.layers.38.mlp.switch_mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
919
+ "model.layers.38.mlp.switch_mlp.up_proj.biases": "model-00004-of-00004.safetensors",
920
+ "model.layers.38.mlp.switch_mlp.up_proj.scales": "model-00004-of-00004.safetensors",
921
+ "model.layers.38.mlp.switch_mlp.up_proj.weight": "model-00004-of-00004.safetensors",
922
  "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
923
  "model.layers.38.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
924
  "model.layers.38.self_attn.k_proj.biases": "model-00003-of-00004.safetensors",
 
934
  "model.layers.38.self_attn.v_proj.biases": "model-00003-of-00004.safetensors",
935
  "model.layers.38.self_attn.v_proj.scales": "model-00003-of-00004.safetensors",
936
  "model.layers.38.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
937
+ "model.layers.39.input_layernorm.weight": "model-00004-of-00004.safetensors",
938
+ "model.layers.39.mlp.gate.biases": "model-00004-of-00004.safetensors",
939
+ "model.layers.39.mlp.gate.scales": "model-00004-of-00004.safetensors",
940
+ "model.layers.39.mlp.gate.weight": "model-00004-of-00004.safetensors",
941
+ "model.layers.39.mlp.switch_mlp.down_proj.biases": "model-00004-of-00004.safetensors",
942
+ "model.layers.39.mlp.switch_mlp.down_proj.scales": "model-00004-of-00004.safetensors",
943
+ "model.layers.39.mlp.switch_mlp.down_proj.weight": "model-00004-of-00004.safetensors",
944
+ "model.layers.39.mlp.switch_mlp.gate_proj.biases": "model-00004-of-00004.safetensors",
945
+ "model.layers.39.mlp.switch_mlp.gate_proj.scales": "model-00004-of-00004.safetensors",
946
+ "model.layers.39.mlp.switch_mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
947
+ "model.layers.39.mlp.switch_mlp.up_proj.biases": "model-00004-of-00004.safetensors",
948
+ "model.layers.39.mlp.switch_mlp.up_proj.scales": "model-00004-of-00004.safetensors",
949
+ "model.layers.39.mlp.switch_mlp.up_proj.weight": "model-00004-of-00004.safetensors",
950
+ "model.layers.39.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
951
+ "model.layers.39.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
952
+ "model.layers.39.self_attn.k_proj.biases": "model-00004-of-00004.safetensors",
953
+ "model.layers.39.self_attn.k_proj.scales": "model-00004-of-00004.safetensors",
954
+ "model.layers.39.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
955
+ "model.layers.39.self_attn.o_proj.biases": "model-00004-of-00004.safetensors",
956
+ "model.layers.39.self_attn.o_proj.scales": "model-00004-of-00004.safetensors",
957
+ "model.layers.39.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
958
+ "model.layers.39.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
959
+ "model.layers.39.self_attn.q_proj.biases": "model-00004-of-00004.safetensors",
960
+ "model.layers.39.self_attn.q_proj.scales": "model-00004-of-00004.safetensors",
961
+ "model.layers.39.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
962
+ "model.layers.39.self_attn.v_proj.biases": "model-00004-of-00004.safetensors",
963
+ "model.layers.39.self_attn.v_proj.scales": "model-00004-of-00004.safetensors",
964
+ "model.layers.39.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
965
  "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
966
  "model.layers.4.mlp.gate.biases": "model-00001-of-00004.safetensors",
967
  "model.layers.4.mlp.gate.scales": "model-00001-of-00004.safetensors",
 
990
  "model.layers.4.self_attn.v_proj.biases": "model-00001-of-00004.safetensors",
991
  "model.layers.4.self_attn.v_proj.scales": "model-00001-of-00004.safetensors",
992
  "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
993
+ "model.layers.40.input_layernorm.weight": "model-00004-of-00004.safetensors",
994
+ "model.layers.40.mlp.gate.biases": "model-00004-of-00004.safetensors",
995
+ "model.layers.40.mlp.gate.scales": "model-00004-of-00004.safetensors",
996
+ "model.layers.40.mlp.gate.weight": "model-00004-of-00004.safetensors",
997
+ "model.layers.40.mlp.switch_mlp.down_proj.biases": "model-00004-of-00004.safetensors",
998
+ "model.layers.40.mlp.switch_mlp.down_proj.scales": "model-00004-of-00004.safetensors",
999
+ "model.layers.40.mlp.switch_mlp.down_proj.weight": "model-00004-of-00004.safetensors",
1000
+ "model.layers.40.mlp.switch_mlp.gate_proj.biases": "model-00004-of-00004.safetensors",
1001
+ "model.layers.40.mlp.switch_mlp.gate_proj.scales": "model-00004-of-00004.safetensors",
1002
+ "model.layers.40.mlp.switch_mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
1003
+ "model.layers.40.mlp.switch_mlp.up_proj.biases": "model-00004-of-00004.safetensors",
1004
+ "model.layers.40.mlp.switch_mlp.up_proj.scales": "model-00004-of-00004.safetensors",
1005
+ "model.layers.40.mlp.switch_mlp.up_proj.weight": "model-00004-of-00004.safetensors",
1006
+ "model.layers.40.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
1007
+ "model.layers.40.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
1008
+ "model.layers.40.self_attn.k_proj.biases": "model-00004-of-00004.safetensors",
1009
+ "model.layers.40.self_attn.k_proj.scales": "model-00004-of-00004.safetensors",
1010
+ "model.layers.40.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
1011
+ "model.layers.40.self_attn.o_proj.biases": "model-00004-of-00004.safetensors",
1012
+ "model.layers.40.self_attn.o_proj.scales": "model-00004-of-00004.safetensors",
1013
+ "model.layers.40.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
1014
+ "model.layers.40.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
1015
+ "model.layers.40.self_attn.q_proj.biases": "model-00004-of-00004.safetensors",
1016
+ "model.layers.40.self_attn.q_proj.scales": "model-00004-of-00004.safetensors",
1017
+ "model.layers.40.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
1018
+ "model.layers.40.self_attn.v_proj.biases": "model-00004-of-00004.safetensors",
1019
+ "model.layers.40.self_attn.v_proj.scales": "model-00004-of-00004.safetensors",
1020
+ "model.layers.40.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
1021
+ "model.layers.41.input_layernorm.weight": "model-00004-of-00004.safetensors",
1022
+ "model.layers.41.mlp.gate.biases": "model-00004-of-00004.safetensors",
1023
+ "model.layers.41.mlp.gate.scales": "model-00004-of-00004.safetensors",
1024
+ "model.layers.41.mlp.gate.weight": "model-00004-of-00004.safetensors",
1025
  "model.layers.41.mlp.switch_mlp.down_proj.biases": "model-00004-of-00004.safetensors",
1026
  "model.layers.41.mlp.switch_mlp.down_proj.scales": "model-00004-of-00004.safetensors",
1027
  "model.layers.41.mlp.switch_mlp.down_proj.weight": "model-00004-of-00004.safetensors",
 
1031
  "model.layers.41.mlp.switch_mlp.up_proj.biases": "model-00004-of-00004.safetensors",
1032
  "model.layers.41.mlp.switch_mlp.up_proj.scales": "model-00004-of-00004.safetensors",
1033
  "model.layers.41.mlp.switch_mlp.up_proj.weight": "model-00004-of-00004.safetensors",
1034
+ "model.layers.41.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
1035
+ "model.layers.41.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
1036
+ "model.layers.41.self_attn.k_proj.biases": "model-00004-of-00004.safetensors",
1037
+ "model.layers.41.self_attn.k_proj.scales": "model-00004-of-00004.safetensors",
1038
+ "model.layers.41.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
1039
+ "model.layers.41.self_attn.o_proj.biases": "model-00004-of-00004.safetensors",
1040
+ "model.layers.41.self_attn.o_proj.scales": "model-00004-of-00004.safetensors",
1041
+ "model.layers.41.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
1042
+ "model.layers.41.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
1043
+ "model.layers.41.self_attn.q_proj.biases": "model-00004-of-00004.safetensors",
1044
+ "model.layers.41.self_attn.q_proj.scales": "model-00004-of-00004.safetensors",
1045
+ "model.layers.41.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
1046
+ "model.layers.41.self_attn.v_proj.biases": "model-00004-of-00004.safetensors",
1047
+ "model.layers.41.self_attn.v_proj.scales": "model-00004-of-00004.safetensors",
1048
+ "model.layers.41.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
1049
  "model.layers.42.input_layernorm.weight": "model-00004-of-00004.safetensors",
1050
  "model.layers.42.mlp.gate.biases": "model-00004-of-00004.safetensors",
1051
  "model.layers.42.mlp.gate.scales": "model-00004-of-00004.safetensors",