nytopop commited on
Commit
3c66597
·
verified ·
1 Parent(s): 558b191

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -17,8 +17,6 @@ from llmcompressor.transformers.compression.helpers import calculate_offload_dev
17
  model_id = "Qwen/Qwen3-30B-A3B"
18
  model_out = model_id.split("/")[1] + ".w4a16"
19
 
20
- recipe = QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"], dampening_frac=0.1)
21
-
22
  device_map = calculate_offload_device_map(
23
  model_id, reserve_for_hessians=False, num_gpus=1, torch_dtype="bfloat16"
24
  )
@@ -33,5 +31,11 @@ model = AutoModelForCausalLM.from_pretrained(
33
  torch_dtype="bfloat16",
34
  )
35
 
 
 
 
 
 
 
36
  oneshot(model=model, recipe=recipe, output_dir=model_out)
37
  ```
 
17
  model_id = "Qwen/Qwen3-30B-A3B"
18
  model_out = model_id.split("/")[1] + ".w4a16"
19
 
 
 
20
  device_map = calculate_offload_device_map(
21
  model_id, reserve_for_hessians=False, num_gpus=1, torch_dtype="bfloat16"
22
  )
 
31
  torch_dtype="bfloat16",
32
  )
33
 
34
+ recipe = QuantizationModifier(
35
+ targets="Linear",
36
+ scheme="W4A16",
37
+ ignore=["lm_head", "re:.*mlp.gate$", "re:.*mlp.shared_expert_gate$"],
38
+ )
39
+
40
  oneshot(model=model, recipe=recipe, output_dir=model_out)
41
  ```
config.json CHANGED
@@ -49,6 +49,54 @@
49
  "format": "pack-quantized",
50
  "global_compression_ratio": null,
51
  "ignore": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  "lm_head"
53
  ],
54
  "kv_cache_scheme": null,
 
49
  "format": "pack-quantized",
50
  "global_compression_ratio": null,
51
  "ignore": [
52
+ "model.layers.0.mlp.gate",
53
+ "model.layers.1.mlp.gate",
54
+ "model.layers.2.mlp.gate",
55
+ "model.layers.3.mlp.gate",
56
+ "model.layers.4.mlp.gate",
57
+ "model.layers.5.mlp.gate",
58
+ "model.layers.6.mlp.gate",
59
+ "model.layers.7.mlp.gate",
60
+ "model.layers.8.mlp.gate",
61
+ "model.layers.9.mlp.gate",
62
+ "model.layers.10.mlp.gate",
63
+ "model.layers.11.mlp.gate",
64
+ "model.layers.12.mlp.gate",
65
+ "model.layers.13.mlp.gate",
66
+ "model.layers.14.mlp.gate",
67
+ "model.layers.15.mlp.gate",
68
+ "model.layers.16.mlp.gate",
69
+ "model.layers.17.mlp.gate",
70
+ "model.layers.18.mlp.gate",
71
+ "model.layers.19.mlp.gate",
72
+ "model.layers.20.mlp.gate",
73
+ "model.layers.21.mlp.gate",
74
+ "model.layers.22.mlp.gate",
75
+ "model.layers.23.mlp.gate",
76
+ "model.layers.24.mlp.gate",
77
+ "model.layers.25.mlp.gate",
78
+ "model.layers.26.mlp.gate",
79
+ "model.layers.27.mlp.gate",
80
+ "model.layers.28.mlp.gate",
81
+ "model.layers.29.mlp.gate",
82
+ "model.layers.30.mlp.gate",
83
+ "model.layers.31.mlp.gate",
84
+ "model.layers.32.mlp.gate",
85
+ "model.layers.33.mlp.gate",
86
+ "model.layers.34.mlp.gate",
87
+ "model.layers.35.mlp.gate",
88
+ "model.layers.36.mlp.gate",
89
+ "model.layers.37.mlp.gate",
90
+ "model.layers.38.mlp.gate",
91
+ "model.layers.39.mlp.gate",
92
+ "model.layers.40.mlp.gate",
93
+ "model.layers.41.mlp.gate",
94
+ "model.layers.42.mlp.gate",
95
+ "model.layers.43.mlp.gate",
96
+ "model.layers.44.mlp.gate",
97
+ "model.layers.45.mlp.gate",
98
+ "model.layers.46.mlp.gate",
99
+ "model.layers.47.mlp.gate",
100
  "lm_head"
101
  ],
102
  "kv_cache_scheme": null,
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7057a9d4836c1931f7b026d075710e8171a30edd3312dce25a528540558a3ff7
3
- size 5001759432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8367c13e9891801c60ef7f26060d972d1ea79e43fc4e87705106c1fd660e403a
3
+ size 5001524144
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f79e6c9077f477aa405e6fd39e022e55c7915f57c2eae1c57cac5810e26abcf2
3
- size 5002072232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c899896142c15978505e045a1935314fefd82781b162075487b181e61cf11e2b
3
+ size 5001803304
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7198692a85de129d879adb808ee2b0dcc22463439c34ff4109d9fc4faf9e1520
3
- size 5001930568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85fdd3a5b3b209608b1818cbb45de6e6a9db7fa57c3db3f73641af34a830e3b9
3
+ size 5002084152
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcd554978c38ba92b4e0d95d3241891667aa1a219dfa7645b6e2dd3a5e6b2c7f
3
- size 1668650384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c9a3ff0bd6804117efaa57d01be938f5c27ba865639c27010d18cb579df36ab
3
+ size 1687667728
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 16667508480
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
@@ -1158,9 +1158,7 @@
1158
  "model.layers.0.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
1159
  "model.layers.0.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
1160
  "model.layers.0.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
1161
- "model.layers.0.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
1162
- "model.layers.0.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
1163
- "model.layers.0.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
1164
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
1165
  "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
1166
  "model.layers.0.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -2329,9 +2327,7 @@
2329
  "model.layers.1.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
2330
  "model.layers.1.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
2331
  "model.layers.1.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
2332
- "model.layers.1.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
2333
- "model.layers.1.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
2334
- "model.layers.1.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
2335
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
2336
  "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
2337
  "model.layers.1.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -3500,9 +3496,7 @@
3500
  "model.layers.10.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
3501
  "model.layers.10.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
3502
  "model.layers.10.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
3503
- "model.layers.10.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
3504
- "model.layers.10.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
3505
- "model.layers.10.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
3506
  "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
3507
  "model.layers.10.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
3508
  "model.layers.10.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -4671,9 +4665,7 @@
4671
  "model.layers.11.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
4672
  "model.layers.11.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
4673
  "model.layers.11.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
4674
- "model.layers.11.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
4675
- "model.layers.11.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
4676
- "model.layers.11.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
4677
  "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
4678
  "model.layers.11.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
4679
  "model.layers.11.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -5842,9 +5834,7 @@
5842
  "model.layers.12.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
5843
  "model.layers.12.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
5844
  "model.layers.12.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
5845
- "model.layers.12.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
5846
- "model.layers.12.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
5847
- "model.layers.12.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
5848
  "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
5849
  "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
5850
  "model.layers.12.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -6779,33 +6769,33 @@
6779
  "model.layers.13.mlp.experts.75.up_proj.weight_packed": "model-00001-of-00004.safetensors",
6780
  "model.layers.13.mlp.experts.75.up_proj.weight_scale": "model-00001-of-00004.safetensors",
6781
  "model.layers.13.mlp.experts.75.up_proj.weight_shape": "model-00001-of-00004.safetensors",
6782
- "model.layers.13.mlp.experts.76.down_proj.weight_packed": "model-00001-of-00004.safetensors",
6783
- "model.layers.13.mlp.experts.76.down_proj.weight_scale": "model-00001-of-00004.safetensors",
6784
- "model.layers.13.mlp.experts.76.down_proj.weight_shape": "model-00001-of-00004.safetensors",
6785
- "model.layers.13.mlp.experts.76.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
6786
- "model.layers.13.mlp.experts.76.gate_proj.weight_scale": "model-00001-of-00004.safetensors",
6787
  "model.layers.13.mlp.experts.76.gate_proj.weight_shape": "model-00001-of-00004.safetensors",
6788
- "model.layers.13.mlp.experts.76.up_proj.weight_packed": "model-00001-of-00004.safetensors",
6789
- "model.layers.13.mlp.experts.76.up_proj.weight_scale": "model-00001-of-00004.safetensors",
6790
- "model.layers.13.mlp.experts.76.up_proj.weight_shape": "model-00001-of-00004.safetensors",
6791
- "model.layers.13.mlp.experts.77.down_proj.weight_packed": "model-00001-of-00004.safetensors",
6792
- "model.layers.13.mlp.experts.77.down_proj.weight_scale": "model-00001-of-00004.safetensors",
6793
- "model.layers.13.mlp.experts.77.down_proj.weight_shape": "model-00001-of-00004.safetensors",
6794
- "model.layers.13.mlp.experts.77.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
6795
- "model.layers.13.mlp.experts.77.gate_proj.weight_scale": "model-00001-of-00004.safetensors",
6796
- "model.layers.13.mlp.experts.77.gate_proj.weight_shape": "model-00001-of-00004.safetensors",
6797
- "model.layers.13.mlp.experts.77.up_proj.weight_packed": "model-00001-of-00004.safetensors",
6798
- "model.layers.13.mlp.experts.77.up_proj.weight_scale": "model-00001-of-00004.safetensors",
6799
- "model.layers.13.mlp.experts.77.up_proj.weight_shape": "model-00001-of-00004.safetensors",
6800
  "model.layers.13.mlp.experts.78.down_proj.weight_packed": "model-00002-of-00004.safetensors",
6801
  "model.layers.13.mlp.experts.78.down_proj.weight_scale": "model-00002-of-00004.safetensors",
6802
  "model.layers.13.mlp.experts.78.down_proj.weight_shape": "model-00002-of-00004.safetensors",
6803
- "model.layers.13.mlp.experts.78.gate_proj.weight_packed": "model-00001-of-00004.safetensors",
6804
- "model.layers.13.mlp.experts.78.gate_proj.weight_scale": "model-00001-of-00004.safetensors",
6805
- "model.layers.13.mlp.experts.78.gate_proj.weight_shape": "model-00001-of-00004.safetensors",
6806
  "model.layers.13.mlp.experts.78.up_proj.weight_packed": "model-00002-of-00004.safetensors",
6807
  "model.layers.13.mlp.experts.78.up_proj.weight_scale": "model-00002-of-00004.safetensors",
6808
- "model.layers.13.mlp.experts.78.up_proj.weight_shape": "model-00001-of-00004.safetensors",
6809
  "model.layers.13.mlp.experts.79.down_proj.weight_packed": "model-00002-of-00004.safetensors",
6810
  "model.layers.13.mlp.experts.79.down_proj.weight_scale": "model-00002-of-00004.safetensors",
6811
  "model.layers.13.mlp.experts.79.down_proj.weight_shape": "model-00002-of-00004.safetensors",
@@ -7013,9 +7003,7 @@
7013
  "model.layers.13.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
7014
  "model.layers.13.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
7015
  "model.layers.13.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
7016
- "model.layers.13.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
7017
- "model.layers.13.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
7018
- "model.layers.13.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
7019
  "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
7020
  "model.layers.13.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
7021
  "model.layers.13.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -8184,9 +8172,7 @@
8184
  "model.layers.14.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
8185
  "model.layers.14.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
8186
  "model.layers.14.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
8187
- "model.layers.14.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
8188
- "model.layers.14.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
8189
- "model.layers.14.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
8190
  "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
8191
  "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
8192
  "model.layers.14.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -9355,9 +9341,7 @@
9355
  "model.layers.15.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
9356
  "model.layers.15.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
9357
  "model.layers.15.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
9358
- "model.layers.15.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
9359
- "model.layers.15.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
9360
- "model.layers.15.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
9361
  "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
9362
  "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
9363
  "model.layers.15.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -10526,9 +10510,7 @@
10526
  "model.layers.16.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
10527
  "model.layers.16.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
10528
  "model.layers.16.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
10529
- "model.layers.16.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
10530
- "model.layers.16.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
10531
- "model.layers.16.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
10532
  "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
10533
  "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
10534
  "model.layers.16.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -11697,9 +11679,7 @@
11697
  "model.layers.17.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
11698
  "model.layers.17.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
11699
  "model.layers.17.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
11700
- "model.layers.17.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
11701
- "model.layers.17.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
11702
- "model.layers.17.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
11703
  "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
11704
  "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
11705
  "model.layers.17.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -12868,9 +12848,7 @@
12868
  "model.layers.18.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
12869
  "model.layers.18.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
12870
  "model.layers.18.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
12871
- "model.layers.18.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
12872
- "model.layers.18.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
12873
- "model.layers.18.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
12874
  "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
12875
  "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
12876
  "model.layers.18.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -14039,9 +14017,7 @@
14039
  "model.layers.19.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
14040
  "model.layers.19.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
14041
  "model.layers.19.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
14042
- "model.layers.19.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
14043
- "model.layers.19.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
14044
- "model.layers.19.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
14045
  "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
14046
  "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
14047
  "model.layers.19.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -15210,9 +15186,7 @@
15210
  "model.layers.2.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
15211
  "model.layers.2.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
15212
  "model.layers.2.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
15213
- "model.layers.2.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
15214
- "model.layers.2.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
15215
- "model.layers.2.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
15216
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
15217
  "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
15218
  "model.layers.2.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -16381,9 +16355,7 @@
16381
  "model.layers.20.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
16382
  "model.layers.20.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
16383
  "model.layers.20.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
16384
- "model.layers.20.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
16385
- "model.layers.20.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
16386
- "model.layers.20.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
16387
  "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
16388
  "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
16389
  "model.layers.20.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -17552,9 +17524,7 @@
17552
  "model.layers.21.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
17553
  "model.layers.21.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
17554
  "model.layers.21.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
17555
- "model.layers.21.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
17556
- "model.layers.21.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
17557
- "model.layers.21.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
17558
  "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
17559
  "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
17560
  "model.layers.21.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -18723,9 +18693,7 @@
18723
  "model.layers.22.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
18724
  "model.layers.22.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
18725
  "model.layers.22.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
18726
- "model.layers.22.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
18727
- "model.layers.22.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
18728
- "model.layers.22.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
18729
  "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
18730
  "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
18731
  "model.layers.22.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -19894,9 +19862,7 @@
19894
  "model.layers.23.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
19895
  "model.layers.23.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
19896
  "model.layers.23.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
19897
- "model.layers.23.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
19898
- "model.layers.23.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
19899
- "model.layers.23.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
19900
  "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
19901
  "model.layers.23.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
19902
  "model.layers.23.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -21065,9 +21031,7 @@
21065
  "model.layers.24.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
21066
  "model.layers.24.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
21067
  "model.layers.24.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
21068
- "model.layers.24.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
21069
- "model.layers.24.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
21070
- "model.layers.24.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
21071
  "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
21072
  "model.layers.24.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
21073
  "model.layers.24.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -22236,9 +22200,7 @@
22236
  "model.layers.25.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
22237
  "model.layers.25.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
22238
  "model.layers.25.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
22239
- "model.layers.25.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
22240
- "model.layers.25.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
22241
- "model.layers.25.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
22242
  "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
22243
  "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
22244
  "model.layers.25.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -23407,9 +23369,7 @@
23407
  "model.layers.26.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
23408
  "model.layers.26.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
23409
  "model.layers.26.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
23410
- "model.layers.26.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
23411
- "model.layers.26.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
23412
- "model.layers.26.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
23413
  "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
23414
  "model.layers.26.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
23415
  "model.layers.26.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -24578,9 +24538,7 @@
24578
  "model.layers.27.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
24579
  "model.layers.27.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
24580
  "model.layers.27.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
24581
- "model.layers.27.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
24582
- "model.layers.27.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
24583
- "model.layers.27.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
24584
  "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
24585
  "model.layers.27.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
24586
  "model.layers.27.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -25749,9 +25707,7 @@
25749
  "model.layers.28.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
25750
  "model.layers.28.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
25751
  "model.layers.28.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
25752
- "model.layers.28.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
25753
- "model.layers.28.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
25754
- "model.layers.28.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
25755
  "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
25756
  "model.layers.28.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
25757
  "model.layers.28.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -26083,51 +26039,51 @@
26083
  "model.layers.29.mlp.experts.14.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26084
  "model.layers.29.mlp.experts.14.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26085
  "model.layers.29.mlp.experts.14.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26086
- "model.layers.29.mlp.experts.15.down_proj.weight_packed": "model-00002-of-00004.safetensors",
26087
- "model.layers.29.mlp.experts.15.down_proj.weight_scale": "model-00002-of-00004.safetensors",
26088
- "model.layers.29.mlp.experts.15.down_proj.weight_shape": "model-00002-of-00004.safetensors",
26089
  "model.layers.29.mlp.experts.15.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
26090
  "model.layers.29.mlp.experts.15.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
26091
  "model.layers.29.mlp.experts.15.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
26092
- "model.layers.29.mlp.experts.15.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26093
- "model.layers.29.mlp.experts.15.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26094
  "model.layers.29.mlp.experts.15.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26095
- "model.layers.29.mlp.experts.16.down_proj.weight_packed": "model-00002-of-00004.safetensors",
26096
- "model.layers.29.mlp.experts.16.down_proj.weight_scale": "model-00002-of-00004.safetensors",
26097
- "model.layers.29.mlp.experts.16.down_proj.weight_shape": "model-00002-of-00004.safetensors",
26098
- "model.layers.29.mlp.experts.16.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
26099
- "model.layers.29.mlp.experts.16.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
26100
- "model.layers.29.mlp.experts.16.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
26101
- "model.layers.29.mlp.experts.16.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26102
- "model.layers.29.mlp.experts.16.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26103
- "model.layers.29.mlp.experts.16.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26104
- "model.layers.29.mlp.experts.17.down_proj.weight_packed": "model-00002-of-00004.safetensors",
26105
- "model.layers.29.mlp.experts.17.down_proj.weight_scale": "model-00002-of-00004.safetensors",
26106
- "model.layers.29.mlp.experts.17.down_proj.weight_shape": "model-00002-of-00004.safetensors",
26107
- "model.layers.29.mlp.experts.17.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
26108
- "model.layers.29.mlp.experts.17.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
26109
- "model.layers.29.mlp.experts.17.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
26110
- "model.layers.29.mlp.experts.17.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26111
- "model.layers.29.mlp.experts.17.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26112
- "model.layers.29.mlp.experts.17.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26113
- "model.layers.29.mlp.experts.18.down_proj.weight_packed": "model-00002-of-00004.safetensors",
26114
- "model.layers.29.mlp.experts.18.down_proj.weight_scale": "model-00002-of-00004.safetensors",
26115
- "model.layers.29.mlp.experts.18.down_proj.weight_shape": "model-00002-of-00004.safetensors",
26116
- "model.layers.29.mlp.experts.18.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
26117
- "model.layers.29.mlp.experts.18.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
26118
- "model.layers.29.mlp.experts.18.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
26119
- "model.layers.29.mlp.experts.18.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26120
- "model.layers.29.mlp.experts.18.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26121
- "model.layers.29.mlp.experts.18.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26122
- "model.layers.29.mlp.experts.19.down_proj.weight_packed": "model-00002-of-00004.safetensors",
26123
- "model.layers.29.mlp.experts.19.down_proj.weight_scale": "model-00002-of-00004.safetensors",
26124
- "model.layers.29.mlp.experts.19.down_proj.weight_shape": "model-00002-of-00004.safetensors",
26125
- "model.layers.29.mlp.experts.19.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
26126
- "model.layers.29.mlp.experts.19.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
26127
- "model.layers.29.mlp.experts.19.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
26128
- "model.layers.29.mlp.experts.19.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26129
- "model.layers.29.mlp.experts.19.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26130
- "model.layers.29.mlp.experts.19.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26131
  "model.layers.29.mlp.experts.2.down_proj.weight_packed": "model-00002-of-00004.safetensors",
26132
  "model.layers.29.mlp.experts.2.down_proj.weight_scale": "model-00002-of-00004.safetensors",
26133
  "model.layers.29.mlp.experts.2.down_proj.weight_shape": "model-00002-of-00004.safetensors",
@@ -26140,12 +26096,12 @@
26140
  "model.layers.29.mlp.experts.20.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26141
  "model.layers.29.mlp.experts.20.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26142
  "model.layers.29.mlp.experts.20.down_proj.weight_shape": "model-00003-of-00004.safetensors",
26143
- "model.layers.29.mlp.experts.20.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
26144
- "model.layers.29.mlp.experts.20.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
26145
- "model.layers.29.mlp.experts.20.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
26146
  "model.layers.29.mlp.experts.20.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26147
  "model.layers.29.mlp.experts.20.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26148
- "model.layers.29.mlp.experts.20.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26149
  "model.layers.29.mlp.experts.21.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26150
  "model.layers.29.mlp.experts.21.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26151
  "model.layers.29.mlp.experts.21.down_proj.weight_shape": "model-00003-of-00004.safetensors",
@@ -26920,9 +26876,7 @@
26920
  "model.layers.29.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26921
  "model.layers.29.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26922
  "model.layers.29.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
26923
- "model.layers.29.mlp.gate.weight_packed": "model-00002-of-00004.safetensors",
26924
- "model.layers.29.mlp.gate.weight_scale": "model-00002-of-00004.safetensors",
26925
- "model.layers.29.mlp.gate.weight_shape": "model-00002-of-00004.safetensors",
26926
  "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
26927
  "model.layers.29.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
26928
  "model.layers.29.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
@@ -28091,9 +28045,7 @@
28091
  "model.layers.3.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
28092
  "model.layers.3.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
28093
  "model.layers.3.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
28094
- "model.layers.3.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
28095
- "model.layers.3.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
28096
- "model.layers.3.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
28097
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
28098
  "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
28099
  "model.layers.3.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -29262,9 +29214,7 @@
29262
  "model.layers.30.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
29263
  "model.layers.30.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
29264
  "model.layers.30.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
29265
- "model.layers.30.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
29266
- "model.layers.30.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
29267
- "model.layers.30.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
29268
  "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
29269
  "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
29270
  "model.layers.30.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -30433,9 +30383,7 @@
30433
  "model.layers.31.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
30434
  "model.layers.31.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
30435
  "model.layers.31.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
30436
- "model.layers.31.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
30437
- "model.layers.31.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
30438
- "model.layers.31.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
30439
  "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
30440
  "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
30441
  "model.layers.31.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -31604,9 +31552,7 @@
31604
  "model.layers.32.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
31605
  "model.layers.32.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
31606
  "model.layers.32.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
31607
- "model.layers.32.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
31608
- "model.layers.32.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
31609
- "model.layers.32.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
31610
  "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
31611
  "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
31612
  "model.layers.32.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -32775,9 +32721,7 @@
32775
  "model.layers.33.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
32776
  "model.layers.33.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
32777
  "model.layers.33.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
32778
- "model.layers.33.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
32779
- "model.layers.33.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
32780
- "model.layers.33.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
32781
  "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
32782
  "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
32783
  "model.layers.33.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -33946,9 +33890,7 @@
33946
  "model.layers.34.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
33947
  "model.layers.34.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
33948
  "model.layers.34.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
33949
- "model.layers.34.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
33950
- "model.layers.34.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
33951
- "model.layers.34.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
33952
  "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
33953
  "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
33954
  "model.layers.34.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -35117,9 +35059,7 @@
35117
  "model.layers.35.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
35118
  "model.layers.35.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
35119
  "model.layers.35.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
35120
- "model.layers.35.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
35121
- "model.layers.35.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
35122
- "model.layers.35.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
35123
  "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
35124
  "model.layers.35.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
35125
  "model.layers.35.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -36288,9 +36228,7 @@
36288
  "model.layers.36.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
36289
  "model.layers.36.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
36290
  "model.layers.36.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
36291
- "model.layers.36.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
36292
- "model.layers.36.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
36293
- "model.layers.36.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
36294
  "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
36295
  "model.layers.36.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
36296
  "model.layers.36.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -37459,9 +37397,7 @@
37459
  "model.layers.37.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
37460
  "model.layers.37.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
37461
  "model.layers.37.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
37462
- "model.layers.37.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
37463
- "model.layers.37.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
37464
- "model.layers.37.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
37465
  "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
37466
  "model.layers.37.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
37467
  "model.layers.37.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -38630,9 +38566,7 @@
38630
  "model.layers.38.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
38631
  "model.layers.38.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
38632
  "model.layers.38.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
38633
- "model.layers.38.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
38634
- "model.layers.38.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
38635
- "model.layers.38.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
38636
  "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
38637
  "model.layers.38.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
38638
  "model.layers.38.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -39801,9 +39735,7 @@
39801
  "model.layers.39.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
39802
  "model.layers.39.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
39803
  "model.layers.39.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
39804
- "model.layers.39.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
39805
- "model.layers.39.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
39806
- "model.layers.39.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
39807
  "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
39808
  "model.layers.39.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
39809
  "model.layers.39.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -40972,9 +40904,7 @@
40972
  "model.layers.4.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
40973
  "model.layers.4.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
40974
  "model.layers.4.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
40975
- "model.layers.4.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
40976
- "model.layers.4.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
40977
- "model.layers.4.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
40978
  "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
40979
  "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
40980
  "model.layers.4.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -42143,9 +42073,7 @@
42143
  "model.layers.40.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
42144
  "model.layers.40.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
42145
  "model.layers.40.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
42146
- "model.layers.40.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
42147
- "model.layers.40.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
42148
- "model.layers.40.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
42149
  "model.layers.40.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
42150
  "model.layers.40.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
42151
  "model.layers.40.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -43314,9 +43242,7 @@
43314
  "model.layers.41.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
43315
  "model.layers.41.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
43316
  "model.layers.41.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
43317
- "model.layers.41.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
43318
- "model.layers.41.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
43319
- "model.layers.41.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
43320
  "model.layers.41.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
43321
  "model.layers.41.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
43322
  "model.layers.41.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -44485,9 +44411,7 @@
44485
  "model.layers.42.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
44486
  "model.layers.42.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
44487
  "model.layers.42.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
44488
- "model.layers.42.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
44489
- "model.layers.42.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
44490
- "model.layers.42.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
44491
  "model.layers.42.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
44492
  "model.layers.42.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
44493
  "model.layers.42.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -45656,9 +45580,7 @@
45656
  "model.layers.43.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
45657
  "model.layers.43.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
45658
  "model.layers.43.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
45659
- "model.layers.43.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
45660
- "model.layers.43.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
45661
- "model.layers.43.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
45662
  "model.layers.43.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
45663
  "model.layers.43.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
45664
  "model.layers.43.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -46701,33 +46623,33 @@
46701
  "model.layers.44.mlp.experts.86.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46702
  "model.layers.44.mlp.experts.86.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46703
  "model.layers.44.mlp.experts.86.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46704
- "model.layers.44.mlp.experts.87.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46705
- "model.layers.44.mlp.experts.87.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46706
- "model.layers.44.mlp.experts.87.down_proj.weight_shape": "model-00003-of-00004.safetensors",
46707
- "model.layers.44.mlp.experts.87.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
46708
- "model.layers.44.mlp.experts.87.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
46709
  "model.layers.44.mlp.experts.87.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46710
- "model.layers.44.mlp.experts.87.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46711
- "model.layers.44.mlp.experts.87.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46712
- "model.layers.44.mlp.experts.87.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46713
- "model.layers.44.mlp.experts.88.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46714
- "model.layers.44.mlp.experts.88.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46715
- "model.layers.44.mlp.experts.88.down_proj.weight_shape": "model-00003-of-00004.safetensors",
46716
- "model.layers.44.mlp.experts.88.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
46717
- "model.layers.44.mlp.experts.88.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
46718
- "model.layers.44.mlp.experts.88.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46719
- "model.layers.44.mlp.experts.88.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46720
- "model.layers.44.mlp.experts.88.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46721
- "model.layers.44.mlp.experts.88.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46722
- "model.layers.44.mlp.experts.89.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46723
- "model.layers.44.mlp.experts.89.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46724
- "model.layers.44.mlp.experts.89.down_proj.weight_shape": "model-00003-of-00004.safetensors",
46725
- "model.layers.44.mlp.experts.89.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
46726
- "model.layers.44.mlp.experts.89.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
46727
- "model.layers.44.mlp.experts.89.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46728
- "model.layers.44.mlp.experts.89.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46729
- "model.layers.44.mlp.experts.89.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46730
- "model.layers.44.mlp.experts.89.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46731
  "model.layers.44.mlp.experts.9.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46732
  "model.layers.44.mlp.experts.9.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46733
  "model.layers.44.mlp.experts.9.down_proj.weight_shape": "model-00003-of-00004.safetensors",
@@ -46737,51 +46659,51 @@
46737
  "model.layers.44.mlp.experts.9.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46738
  "model.layers.44.mlp.experts.9.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46739
  "model.layers.44.mlp.experts.9.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46740
- "model.layers.44.mlp.experts.90.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46741
- "model.layers.44.mlp.experts.90.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46742
- "model.layers.44.mlp.experts.90.down_proj.weight_shape": "model-00003-of-00004.safetensors",
46743
- "model.layers.44.mlp.experts.90.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
46744
- "model.layers.44.mlp.experts.90.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
46745
- "model.layers.44.mlp.experts.90.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46746
- "model.layers.44.mlp.experts.90.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46747
- "model.layers.44.mlp.experts.90.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46748
- "model.layers.44.mlp.experts.90.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46749
- "model.layers.44.mlp.experts.91.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46750
- "model.layers.44.mlp.experts.91.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46751
- "model.layers.44.mlp.experts.91.down_proj.weight_shape": "model-00003-of-00004.safetensors",
46752
- "model.layers.44.mlp.experts.91.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
46753
- "model.layers.44.mlp.experts.91.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
46754
- "model.layers.44.mlp.experts.91.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46755
- "model.layers.44.mlp.experts.91.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46756
- "model.layers.44.mlp.experts.91.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46757
- "model.layers.44.mlp.experts.91.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46758
- "model.layers.44.mlp.experts.92.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46759
- "model.layers.44.mlp.experts.92.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46760
- "model.layers.44.mlp.experts.92.down_proj.weight_shape": "model-00003-of-00004.safetensors",
46761
- "model.layers.44.mlp.experts.92.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
46762
- "model.layers.44.mlp.experts.92.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
46763
- "model.layers.44.mlp.experts.92.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46764
- "model.layers.44.mlp.experts.92.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46765
- "model.layers.44.mlp.experts.92.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46766
- "model.layers.44.mlp.experts.92.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46767
- "model.layers.44.mlp.experts.93.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46768
- "model.layers.44.mlp.experts.93.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46769
- "model.layers.44.mlp.experts.93.down_proj.weight_shape": "model-00003-of-00004.safetensors",
46770
- "model.layers.44.mlp.experts.93.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
46771
- "model.layers.44.mlp.experts.93.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
46772
- "model.layers.44.mlp.experts.93.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46773
- "model.layers.44.mlp.experts.93.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46774
- "model.layers.44.mlp.experts.93.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46775
- "model.layers.44.mlp.experts.93.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46776
  "model.layers.44.mlp.experts.94.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46777
  "model.layers.44.mlp.experts.94.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46778
  "model.layers.44.mlp.experts.94.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46779
- "model.layers.44.mlp.experts.94.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
46780
- "model.layers.44.mlp.experts.94.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
46781
- "model.layers.44.mlp.experts.94.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46782
  "model.layers.44.mlp.experts.94.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46783
  "model.layers.44.mlp.experts.94.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46784
- "model.layers.44.mlp.experts.94.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46785
  "model.layers.44.mlp.experts.95.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46786
  "model.layers.44.mlp.experts.95.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46787
  "model.layers.44.mlp.experts.95.down_proj.weight_shape": "model-00004-of-00004.safetensors",
@@ -46827,9 +46749,7 @@
46827
  "model.layers.44.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46828
  "model.layers.44.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46829
  "model.layers.44.mlp.experts.99.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46830
- "model.layers.44.mlp.gate.weight_packed": "model-00003-of-00004.safetensors",
46831
- "model.layers.44.mlp.gate.weight_scale": "model-00003-of-00004.safetensors",
46832
- "model.layers.44.mlp.gate.weight_shape": "model-00003-of-00004.safetensors",
46833
  "model.layers.44.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
46834
  "model.layers.44.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
46835
  "model.layers.44.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
@@ -47998,9 +47918,7 @@
47998
  "model.layers.45.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
47999
  "model.layers.45.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
48000
  "model.layers.45.mlp.experts.99.up_proj.weight_shape": "model-00004-of-00004.safetensors",
48001
- "model.layers.45.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
48002
- "model.layers.45.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
48003
- "model.layers.45.mlp.gate.weight_shape": "model-00004-of-00004.safetensors",
48004
  "model.layers.45.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
48005
  "model.layers.45.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
48006
  "model.layers.45.self_attn.k_proj.weight_packed": "model-00004-of-00004.safetensors",
@@ -49169,9 +49087,7 @@
49169
  "model.layers.46.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
49170
  "model.layers.46.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
49171
  "model.layers.46.mlp.experts.99.up_proj.weight_shape": "model-00004-of-00004.safetensors",
49172
- "model.layers.46.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
49173
- "model.layers.46.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
49174
- "model.layers.46.mlp.gate.weight_shape": "model-00004-of-00004.safetensors",
49175
  "model.layers.46.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
49176
  "model.layers.46.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
49177
  "model.layers.46.self_attn.k_proj.weight_packed": "model-00004-of-00004.safetensors",
@@ -50340,9 +50256,7 @@
50340
  "model.layers.47.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
50341
  "model.layers.47.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
50342
  "model.layers.47.mlp.experts.99.up_proj.weight_shape": "model-00004-of-00004.safetensors",
50343
- "model.layers.47.mlp.gate.weight_packed": "model-00004-of-00004.safetensors",
50344
- "model.layers.47.mlp.gate.weight_scale": "model-00004-of-00004.safetensors",
50345
- "model.layers.47.mlp.gate.weight_shape": "model-00004-of-00004.safetensors",
50346
  "model.layers.47.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
50347
  "model.layers.47.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
50348
  "model.layers.47.self_attn.k_proj.weight_packed": "model-00004-of-00004.safetensors",
@@ -51511,9 +51425,7 @@
51511
  "model.layers.5.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
51512
  "model.layers.5.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
51513
  "model.layers.5.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
51514
- "model.layers.5.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
51515
- "model.layers.5.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
51516
- "model.layers.5.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
51517
  "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
51518
  "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
51519
  "model.layers.5.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -52682,9 +52594,7 @@
52682
  "model.layers.6.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
52683
  "model.layers.6.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
52684
  "model.layers.6.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
52685
- "model.layers.6.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
52686
- "model.layers.6.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
52687
- "model.layers.6.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
52688
  "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
52689
  "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
52690
  "model.layers.6.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -53853,9 +53763,7 @@
53853
  "model.layers.7.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
53854
  "model.layers.7.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
53855
  "model.layers.7.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
53856
- "model.layers.7.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
53857
- "model.layers.7.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
53858
- "model.layers.7.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
53859
  "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
53860
  "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
53861
  "model.layers.7.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -55024,9 +54932,7 @@
55024
  "model.layers.8.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
55025
  "model.layers.8.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
55026
  "model.layers.8.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
55027
- "model.layers.8.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
55028
- "model.layers.8.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
55029
- "model.layers.8.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
55030
  "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
55031
  "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
55032
  "model.layers.8.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
@@ -56195,9 +56101,7 @@
56195
  "model.layers.9.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
56196
  "model.layers.9.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
56197
  "model.layers.9.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
56198
- "model.layers.9.mlp.gate.weight_packed": "model-00001-of-00004.safetensors",
56199
- "model.layers.9.mlp.gate.weight_scale": "model-00001-of-00004.safetensors",
56200
- "model.layers.9.mlp.gate.weight_shape": "model-00001-of-00004.safetensors",
56201
  "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
56202
  "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
56203
  "model.layers.9.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 16686185472
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
1158
  "model.layers.0.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
1159
  "model.layers.0.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
1160
  "model.layers.0.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
1161
+ "model.layers.0.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
1162
  "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
1163
  "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
1164
  "model.layers.0.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
2327
  "model.layers.1.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
2328
  "model.layers.1.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
2329
  "model.layers.1.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
2330
+ "model.layers.1.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
2331
  "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
2332
  "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
2333
  "model.layers.1.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
3496
  "model.layers.10.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
3497
  "model.layers.10.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
3498
  "model.layers.10.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
3499
+ "model.layers.10.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
3500
  "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
3501
  "model.layers.10.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
3502
  "model.layers.10.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
4665
  "model.layers.11.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
4666
  "model.layers.11.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
4667
  "model.layers.11.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
4668
+ "model.layers.11.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
4669
  "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
4670
  "model.layers.11.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
4671
  "model.layers.11.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
5834
  "model.layers.12.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
5835
  "model.layers.12.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
5836
  "model.layers.12.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
5837
+ "model.layers.12.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
5838
  "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
5839
  "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
5840
  "model.layers.12.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
6769
  "model.layers.13.mlp.experts.75.up_proj.weight_packed": "model-00001-of-00004.safetensors",
6770
  "model.layers.13.mlp.experts.75.up_proj.weight_scale": "model-00001-of-00004.safetensors",
6771
  "model.layers.13.mlp.experts.75.up_proj.weight_shape": "model-00001-of-00004.safetensors",
6772
+ "model.layers.13.mlp.experts.76.down_proj.weight_packed": "model-00002-of-00004.safetensors",
6773
+ "model.layers.13.mlp.experts.76.down_proj.weight_scale": "model-00002-of-00004.safetensors",
6774
+ "model.layers.13.mlp.experts.76.down_proj.weight_shape": "model-00002-of-00004.safetensors",
6775
+ "model.layers.13.mlp.experts.76.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
6776
+ "model.layers.13.mlp.experts.76.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
6777
  "model.layers.13.mlp.experts.76.gate_proj.weight_shape": "model-00001-of-00004.safetensors",
6778
+ "model.layers.13.mlp.experts.76.up_proj.weight_packed": "model-00002-of-00004.safetensors",
6779
+ "model.layers.13.mlp.experts.76.up_proj.weight_scale": "model-00002-of-00004.safetensors",
6780
+ "model.layers.13.mlp.experts.76.up_proj.weight_shape": "model-00002-of-00004.safetensors",
6781
+ "model.layers.13.mlp.experts.77.down_proj.weight_packed": "model-00002-of-00004.safetensors",
6782
+ "model.layers.13.mlp.experts.77.down_proj.weight_scale": "model-00002-of-00004.safetensors",
6783
+ "model.layers.13.mlp.experts.77.down_proj.weight_shape": "model-00002-of-00004.safetensors",
6784
+ "model.layers.13.mlp.experts.77.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
6785
+ "model.layers.13.mlp.experts.77.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
6786
+ "model.layers.13.mlp.experts.77.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
6787
+ "model.layers.13.mlp.experts.77.up_proj.weight_packed": "model-00002-of-00004.safetensors",
6788
+ "model.layers.13.mlp.experts.77.up_proj.weight_scale": "model-00002-of-00004.safetensors",
6789
+ "model.layers.13.mlp.experts.77.up_proj.weight_shape": "model-00002-of-00004.safetensors",
6790
  "model.layers.13.mlp.experts.78.down_proj.weight_packed": "model-00002-of-00004.safetensors",
6791
  "model.layers.13.mlp.experts.78.down_proj.weight_scale": "model-00002-of-00004.safetensors",
6792
  "model.layers.13.mlp.experts.78.down_proj.weight_shape": "model-00002-of-00004.safetensors",
6793
+ "model.layers.13.mlp.experts.78.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
6794
+ "model.layers.13.mlp.experts.78.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
6795
+ "model.layers.13.mlp.experts.78.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
6796
  "model.layers.13.mlp.experts.78.up_proj.weight_packed": "model-00002-of-00004.safetensors",
6797
  "model.layers.13.mlp.experts.78.up_proj.weight_scale": "model-00002-of-00004.safetensors",
6798
+ "model.layers.13.mlp.experts.78.up_proj.weight_shape": "model-00002-of-00004.safetensors",
6799
  "model.layers.13.mlp.experts.79.down_proj.weight_packed": "model-00002-of-00004.safetensors",
6800
  "model.layers.13.mlp.experts.79.down_proj.weight_scale": "model-00002-of-00004.safetensors",
6801
  "model.layers.13.mlp.experts.79.down_proj.weight_shape": "model-00002-of-00004.safetensors",
 
7003
  "model.layers.13.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
7004
  "model.layers.13.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
7005
  "model.layers.13.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
7006
+ "model.layers.13.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
7007
  "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
7008
  "model.layers.13.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
7009
  "model.layers.13.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
8172
  "model.layers.14.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
8173
  "model.layers.14.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
8174
  "model.layers.14.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
8175
+ "model.layers.14.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
8176
  "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
8177
  "model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
8178
  "model.layers.14.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
9341
  "model.layers.15.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
9342
  "model.layers.15.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
9343
  "model.layers.15.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
9344
+ "model.layers.15.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
9345
  "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
9346
  "model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
9347
  "model.layers.15.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
10510
  "model.layers.16.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
10511
  "model.layers.16.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
10512
  "model.layers.16.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
10513
+ "model.layers.16.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
10514
  "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
10515
  "model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
10516
  "model.layers.16.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
11679
  "model.layers.17.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
11680
  "model.layers.17.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
11681
  "model.layers.17.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
11682
+ "model.layers.17.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
11683
  "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
11684
  "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
11685
  "model.layers.17.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
12848
  "model.layers.18.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
12849
  "model.layers.18.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
12850
  "model.layers.18.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
12851
+ "model.layers.18.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
12852
  "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
12853
  "model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
12854
  "model.layers.18.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
14017
  "model.layers.19.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
14018
  "model.layers.19.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
14019
  "model.layers.19.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
14020
+ "model.layers.19.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
14021
  "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
14022
  "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
14023
  "model.layers.19.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
15186
  "model.layers.2.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
15187
  "model.layers.2.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
15188
  "model.layers.2.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
15189
+ "model.layers.2.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
15190
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
15191
  "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
15192
  "model.layers.2.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
16355
  "model.layers.20.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
16356
  "model.layers.20.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
16357
  "model.layers.20.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
16358
+ "model.layers.20.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
16359
  "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
16360
  "model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
16361
  "model.layers.20.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
17524
  "model.layers.21.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
17525
  "model.layers.21.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
17526
  "model.layers.21.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
17527
+ "model.layers.21.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
17528
  "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
17529
  "model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
17530
  "model.layers.21.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
18693
  "model.layers.22.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
18694
  "model.layers.22.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
18695
  "model.layers.22.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
18696
+ "model.layers.22.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
18697
  "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
18698
  "model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
18699
  "model.layers.22.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
19862
  "model.layers.23.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
19863
  "model.layers.23.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
19864
  "model.layers.23.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
19865
+ "model.layers.23.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
19866
  "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
19867
  "model.layers.23.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
19868
  "model.layers.23.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
21031
  "model.layers.24.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
21032
  "model.layers.24.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
21033
  "model.layers.24.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
21034
+ "model.layers.24.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
21035
  "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
21036
  "model.layers.24.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
21037
  "model.layers.24.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
22200
  "model.layers.25.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
22201
  "model.layers.25.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
22202
  "model.layers.25.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
22203
+ "model.layers.25.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
22204
  "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
22205
  "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
22206
  "model.layers.25.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
23369
  "model.layers.26.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
23370
  "model.layers.26.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
23371
  "model.layers.26.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
23372
+ "model.layers.26.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
23373
  "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
23374
  "model.layers.26.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
23375
  "model.layers.26.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
24538
  "model.layers.27.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
24539
  "model.layers.27.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
24540
  "model.layers.27.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
24541
+ "model.layers.27.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
24542
  "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
24543
  "model.layers.27.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
24544
  "model.layers.27.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
25707
  "model.layers.28.mlp.experts.99.up_proj.weight_packed": "model-00002-of-00004.safetensors",
25708
  "model.layers.28.mlp.experts.99.up_proj.weight_scale": "model-00002-of-00004.safetensors",
25709
  "model.layers.28.mlp.experts.99.up_proj.weight_shape": "model-00002-of-00004.safetensors",
25710
+ "model.layers.28.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
25711
  "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
25712
  "model.layers.28.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
25713
  "model.layers.28.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
26039
  "model.layers.29.mlp.experts.14.up_proj.weight_packed": "model-00002-of-00004.safetensors",
26040
  "model.layers.29.mlp.experts.14.up_proj.weight_scale": "model-00002-of-00004.safetensors",
26041
  "model.layers.29.mlp.experts.14.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26042
+ "model.layers.29.mlp.experts.15.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26043
+ "model.layers.29.mlp.experts.15.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26044
+ "model.layers.29.mlp.experts.15.down_proj.weight_shape": "model-00003-of-00004.safetensors",
26045
  "model.layers.29.mlp.experts.15.gate_proj.weight_packed": "model-00002-of-00004.safetensors",
26046
  "model.layers.29.mlp.experts.15.gate_proj.weight_scale": "model-00002-of-00004.safetensors",
26047
  "model.layers.29.mlp.experts.15.gate_proj.weight_shape": "model-00002-of-00004.safetensors",
26048
+ "model.layers.29.mlp.experts.15.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26049
+ "model.layers.29.mlp.experts.15.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26050
  "model.layers.29.mlp.experts.15.up_proj.weight_shape": "model-00002-of-00004.safetensors",
26051
+ "model.layers.29.mlp.experts.16.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26052
+ "model.layers.29.mlp.experts.16.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26053
+ "model.layers.29.mlp.experts.16.down_proj.weight_shape": "model-00003-of-00004.safetensors",
26054
+ "model.layers.29.mlp.experts.16.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
26055
+ "model.layers.29.mlp.experts.16.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
26056
+ "model.layers.29.mlp.experts.16.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
26057
+ "model.layers.29.mlp.experts.16.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26058
+ "model.layers.29.mlp.experts.16.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26059
+ "model.layers.29.mlp.experts.16.up_proj.weight_shape": "model-00003-of-00004.safetensors",
26060
+ "model.layers.29.mlp.experts.17.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26061
+ "model.layers.29.mlp.experts.17.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26062
+ "model.layers.29.mlp.experts.17.down_proj.weight_shape": "model-00003-of-00004.safetensors",
26063
+ "model.layers.29.mlp.experts.17.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
26064
+ "model.layers.29.mlp.experts.17.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
26065
+ "model.layers.29.mlp.experts.17.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
26066
+ "model.layers.29.mlp.experts.17.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26067
+ "model.layers.29.mlp.experts.17.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26068
+ "model.layers.29.mlp.experts.17.up_proj.weight_shape": "model-00003-of-00004.safetensors",
26069
+ "model.layers.29.mlp.experts.18.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26070
+ "model.layers.29.mlp.experts.18.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26071
+ "model.layers.29.mlp.experts.18.down_proj.weight_shape": "model-00003-of-00004.safetensors",
26072
+ "model.layers.29.mlp.experts.18.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
26073
+ "model.layers.29.mlp.experts.18.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
26074
+ "model.layers.29.mlp.experts.18.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
26075
+ "model.layers.29.mlp.experts.18.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26076
+ "model.layers.29.mlp.experts.18.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26077
+ "model.layers.29.mlp.experts.18.up_proj.weight_shape": "model-00003-of-00004.safetensors",
26078
+ "model.layers.29.mlp.experts.19.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26079
+ "model.layers.29.mlp.experts.19.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26080
+ "model.layers.29.mlp.experts.19.down_proj.weight_shape": "model-00003-of-00004.safetensors",
26081
+ "model.layers.29.mlp.experts.19.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
26082
+ "model.layers.29.mlp.experts.19.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
26083
+ "model.layers.29.mlp.experts.19.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
26084
+ "model.layers.29.mlp.experts.19.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26085
+ "model.layers.29.mlp.experts.19.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26086
+ "model.layers.29.mlp.experts.19.up_proj.weight_shape": "model-00003-of-00004.safetensors",
26087
  "model.layers.29.mlp.experts.2.down_proj.weight_packed": "model-00002-of-00004.safetensors",
26088
  "model.layers.29.mlp.experts.2.down_proj.weight_scale": "model-00002-of-00004.safetensors",
26089
  "model.layers.29.mlp.experts.2.down_proj.weight_shape": "model-00002-of-00004.safetensors",
 
26096
  "model.layers.29.mlp.experts.20.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26097
  "model.layers.29.mlp.experts.20.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26098
  "model.layers.29.mlp.experts.20.down_proj.weight_shape": "model-00003-of-00004.safetensors",
26099
+ "model.layers.29.mlp.experts.20.gate_proj.weight_packed": "model-00003-of-00004.safetensors",
26100
+ "model.layers.29.mlp.experts.20.gate_proj.weight_scale": "model-00003-of-00004.safetensors",
26101
+ "model.layers.29.mlp.experts.20.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
26102
  "model.layers.29.mlp.experts.20.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26103
  "model.layers.29.mlp.experts.20.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26104
+ "model.layers.29.mlp.experts.20.up_proj.weight_shape": "model-00003-of-00004.safetensors",
26105
  "model.layers.29.mlp.experts.21.down_proj.weight_packed": "model-00003-of-00004.safetensors",
26106
  "model.layers.29.mlp.experts.21.down_proj.weight_scale": "model-00003-of-00004.safetensors",
26107
  "model.layers.29.mlp.experts.21.down_proj.weight_shape": "model-00003-of-00004.safetensors",
 
26876
  "model.layers.29.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
26877
  "model.layers.29.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
26878
  "model.layers.29.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
26879
+ "model.layers.29.mlp.gate.weight": "model-00002-of-00004.safetensors",
 
 
26880
  "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
26881
  "model.layers.29.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
26882
  "model.layers.29.self_attn.k_proj.weight_packed": "model-00002-of-00004.safetensors",
 
28045
  "model.layers.3.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
28046
  "model.layers.3.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
28047
  "model.layers.3.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
28048
+ "model.layers.3.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
28049
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
28050
  "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
28051
  "model.layers.3.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
29214
  "model.layers.30.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
29215
  "model.layers.30.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
29216
  "model.layers.30.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
29217
+ "model.layers.30.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
29218
  "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
29219
  "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
29220
  "model.layers.30.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
30383
  "model.layers.31.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
30384
  "model.layers.31.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
30385
  "model.layers.31.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
30386
+ "model.layers.31.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
30387
  "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
30388
  "model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
30389
  "model.layers.31.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
31552
  "model.layers.32.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
31553
  "model.layers.32.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
31554
  "model.layers.32.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
31555
+ "model.layers.32.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
31556
  "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
31557
  "model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
31558
  "model.layers.32.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
32721
  "model.layers.33.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
32722
  "model.layers.33.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
32723
  "model.layers.33.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
32724
+ "model.layers.33.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
32725
  "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
32726
  "model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
32727
  "model.layers.33.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
33890
  "model.layers.34.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
33891
  "model.layers.34.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
33892
  "model.layers.34.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
33893
+ "model.layers.34.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
33894
  "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
33895
  "model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
33896
  "model.layers.34.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
35059
  "model.layers.35.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
35060
  "model.layers.35.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
35061
  "model.layers.35.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
35062
+ "model.layers.35.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
35063
  "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
35064
  "model.layers.35.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
35065
  "model.layers.35.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
36228
  "model.layers.36.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
36229
  "model.layers.36.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
36230
  "model.layers.36.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
36231
+ "model.layers.36.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
36232
  "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
36233
  "model.layers.36.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
36234
  "model.layers.36.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
37397
  "model.layers.37.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
37398
  "model.layers.37.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
37399
  "model.layers.37.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
37400
+ "model.layers.37.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
37401
  "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
37402
  "model.layers.37.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
37403
  "model.layers.37.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
38566
  "model.layers.38.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
38567
  "model.layers.38.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
38568
  "model.layers.38.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
38569
+ "model.layers.38.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
38570
  "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
38571
  "model.layers.38.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
38572
  "model.layers.38.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
39735
  "model.layers.39.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
39736
  "model.layers.39.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
39737
  "model.layers.39.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
39738
+ "model.layers.39.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
39739
  "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
39740
  "model.layers.39.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
39741
  "model.layers.39.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
40904
  "model.layers.4.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
40905
  "model.layers.4.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
40906
  "model.layers.4.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
40907
+ "model.layers.4.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
40908
  "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
40909
  "model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
40910
  "model.layers.4.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
42073
  "model.layers.40.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
42074
  "model.layers.40.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
42075
  "model.layers.40.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
42076
+ "model.layers.40.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
42077
  "model.layers.40.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
42078
  "model.layers.40.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
42079
  "model.layers.40.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
43242
  "model.layers.41.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
43243
  "model.layers.41.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
43244
  "model.layers.41.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
43245
+ "model.layers.41.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
43246
  "model.layers.41.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
43247
  "model.layers.41.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
43248
  "model.layers.41.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
44411
  "model.layers.42.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
44412
  "model.layers.42.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
44413
  "model.layers.42.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
44414
+ "model.layers.42.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
44415
  "model.layers.42.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
44416
  "model.layers.42.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
44417
  "model.layers.42.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
45580
  "model.layers.43.mlp.experts.99.up_proj.weight_packed": "model-00003-of-00004.safetensors",
45581
  "model.layers.43.mlp.experts.99.up_proj.weight_scale": "model-00003-of-00004.safetensors",
45582
  "model.layers.43.mlp.experts.99.up_proj.weight_shape": "model-00003-of-00004.safetensors",
45583
+ "model.layers.43.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
45584
  "model.layers.43.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
45585
  "model.layers.43.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
45586
  "model.layers.43.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
46623
  "model.layers.44.mlp.experts.86.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46624
  "model.layers.44.mlp.experts.86.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46625
  "model.layers.44.mlp.experts.86.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46626
+ "model.layers.44.mlp.experts.87.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46627
+ "model.layers.44.mlp.experts.87.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46628
+ "model.layers.44.mlp.experts.87.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46629
+ "model.layers.44.mlp.experts.87.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
46630
+ "model.layers.44.mlp.experts.87.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
46631
  "model.layers.44.mlp.experts.87.gate_proj.weight_shape": "model-00003-of-00004.safetensors",
46632
+ "model.layers.44.mlp.experts.87.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46633
+ "model.layers.44.mlp.experts.87.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46634
+ "model.layers.44.mlp.experts.87.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46635
+ "model.layers.44.mlp.experts.88.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46636
+ "model.layers.44.mlp.experts.88.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46637
+ "model.layers.44.mlp.experts.88.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46638
+ "model.layers.44.mlp.experts.88.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
46639
+ "model.layers.44.mlp.experts.88.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
46640
+ "model.layers.44.mlp.experts.88.gate_proj.weight_shape": "model-00004-of-00004.safetensors",
46641
+ "model.layers.44.mlp.experts.88.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46642
+ "model.layers.44.mlp.experts.88.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46643
+ "model.layers.44.mlp.experts.88.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46644
+ "model.layers.44.mlp.experts.89.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46645
+ "model.layers.44.mlp.experts.89.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46646
+ "model.layers.44.mlp.experts.89.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46647
+ "model.layers.44.mlp.experts.89.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
46648
+ "model.layers.44.mlp.experts.89.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
46649
+ "model.layers.44.mlp.experts.89.gate_proj.weight_shape": "model-00004-of-00004.safetensors",
46650
+ "model.layers.44.mlp.experts.89.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46651
+ "model.layers.44.mlp.experts.89.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46652
+ "model.layers.44.mlp.experts.89.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46653
  "model.layers.44.mlp.experts.9.down_proj.weight_packed": "model-00003-of-00004.safetensors",
46654
  "model.layers.44.mlp.experts.9.down_proj.weight_scale": "model-00003-of-00004.safetensors",
46655
  "model.layers.44.mlp.experts.9.down_proj.weight_shape": "model-00003-of-00004.safetensors",
 
46659
  "model.layers.44.mlp.experts.9.up_proj.weight_packed": "model-00003-of-00004.safetensors",
46660
  "model.layers.44.mlp.experts.9.up_proj.weight_scale": "model-00003-of-00004.safetensors",
46661
  "model.layers.44.mlp.experts.9.up_proj.weight_shape": "model-00003-of-00004.safetensors",
46662
+ "model.layers.44.mlp.experts.90.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46663
+ "model.layers.44.mlp.experts.90.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46664
+ "model.layers.44.mlp.experts.90.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46665
+ "model.layers.44.mlp.experts.90.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
46666
+ "model.layers.44.mlp.experts.90.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
46667
+ "model.layers.44.mlp.experts.90.gate_proj.weight_shape": "model-00004-of-00004.safetensors",
46668
+ "model.layers.44.mlp.experts.90.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46669
+ "model.layers.44.mlp.experts.90.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46670
+ "model.layers.44.mlp.experts.90.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46671
+ "model.layers.44.mlp.experts.91.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46672
+ "model.layers.44.mlp.experts.91.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46673
+ "model.layers.44.mlp.experts.91.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46674
+ "model.layers.44.mlp.experts.91.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
46675
+ "model.layers.44.mlp.experts.91.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
46676
+ "model.layers.44.mlp.experts.91.gate_proj.weight_shape": "model-00004-of-00004.safetensors",
46677
+ "model.layers.44.mlp.experts.91.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46678
+ "model.layers.44.mlp.experts.91.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46679
+ "model.layers.44.mlp.experts.91.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46680
+ "model.layers.44.mlp.experts.92.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46681
+ "model.layers.44.mlp.experts.92.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46682
+ "model.layers.44.mlp.experts.92.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46683
+ "model.layers.44.mlp.experts.92.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
46684
+ "model.layers.44.mlp.experts.92.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
46685
+ "model.layers.44.mlp.experts.92.gate_proj.weight_shape": "model-00004-of-00004.safetensors",
46686
+ "model.layers.44.mlp.experts.92.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46687
+ "model.layers.44.mlp.experts.92.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46688
+ "model.layers.44.mlp.experts.92.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46689
+ "model.layers.44.mlp.experts.93.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46690
+ "model.layers.44.mlp.experts.93.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46691
+ "model.layers.44.mlp.experts.93.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46692
+ "model.layers.44.mlp.experts.93.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
46693
+ "model.layers.44.mlp.experts.93.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
46694
+ "model.layers.44.mlp.experts.93.gate_proj.weight_shape": "model-00004-of-00004.safetensors",
46695
+ "model.layers.44.mlp.experts.93.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46696
+ "model.layers.44.mlp.experts.93.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46697
+ "model.layers.44.mlp.experts.93.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46698
  "model.layers.44.mlp.experts.94.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46699
  "model.layers.44.mlp.experts.94.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46700
  "model.layers.44.mlp.experts.94.down_proj.weight_shape": "model-00004-of-00004.safetensors",
46701
+ "model.layers.44.mlp.experts.94.gate_proj.weight_packed": "model-00004-of-00004.safetensors",
46702
+ "model.layers.44.mlp.experts.94.gate_proj.weight_scale": "model-00004-of-00004.safetensors",
46703
+ "model.layers.44.mlp.experts.94.gate_proj.weight_shape": "model-00004-of-00004.safetensors",
46704
  "model.layers.44.mlp.experts.94.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46705
  "model.layers.44.mlp.experts.94.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46706
+ "model.layers.44.mlp.experts.94.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46707
  "model.layers.44.mlp.experts.95.down_proj.weight_packed": "model-00004-of-00004.safetensors",
46708
  "model.layers.44.mlp.experts.95.down_proj.weight_scale": "model-00004-of-00004.safetensors",
46709
  "model.layers.44.mlp.experts.95.down_proj.weight_shape": "model-00004-of-00004.safetensors",
 
46749
  "model.layers.44.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
46750
  "model.layers.44.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
46751
  "model.layers.44.mlp.experts.99.up_proj.weight_shape": "model-00004-of-00004.safetensors",
46752
+ "model.layers.44.mlp.gate.weight": "model-00003-of-00004.safetensors",
 
 
46753
  "model.layers.44.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
46754
  "model.layers.44.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
46755
  "model.layers.44.self_attn.k_proj.weight_packed": "model-00003-of-00004.safetensors",
 
47918
  "model.layers.45.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
47919
  "model.layers.45.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
47920
  "model.layers.45.mlp.experts.99.up_proj.weight_shape": "model-00004-of-00004.safetensors",
47921
+ "model.layers.45.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
47922
  "model.layers.45.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
47923
  "model.layers.45.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
47924
  "model.layers.45.self_attn.k_proj.weight_packed": "model-00004-of-00004.safetensors",
 
49087
  "model.layers.46.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
49088
  "model.layers.46.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
49089
  "model.layers.46.mlp.experts.99.up_proj.weight_shape": "model-00004-of-00004.safetensors",
49090
+ "model.layers.46.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
49091
  "model.layers.46.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
49092
  "model.layers.46.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
49093
  "model.layers.46.self_attn.k_proj.weight_packed": "model-00004-of-00004.safetensors",
 
50256
  "model.layers.47.mlp.experts.99.up_proj.weight_packed": "model-00004-of-00004.safetensors",
50257
  "model.layers.47.mlp.experts.99.up_proj.weight_scale": "model-00004-of-00004.safetensors",
50258
  "model.layers.47.mlp.experts.99.up_proj.weight_shape": "model-00004-of-00004.safetensors",
50259
+ "model.layers.47.mlp.gate.weight": "model-00004-of-00004.safetensors",
 
 
50260
  "model.layers.47.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
50261
  "model.layers.47.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
50262
  "model.layers.47.self_attn.k_proj.weight_packed": "model-00004-of-00004.safetensors",
 
51425
  "model.layers.5.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
51426
  "model.layers.5.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
51427
  "model.layers.5.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
51428
+ "model.layers.5.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
51429
  "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
51430
  "model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
51431
  "model.layers.5.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
52594
  "model.layers.6.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
52595
  "model.layers.6.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
52596
  "model.layers.6.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
52597
+ "model.layers.6.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
52598
  "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
52599
  "model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
52600
  "model.layers.6.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
53763
  "model.layers.7.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
53764
  "model.layers.7.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
53765
  "model.layers.7.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
53766
+ "model.layers.7.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
53767
  "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
53768
  "model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
53769
  "model.layers.7.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
54932
  "model.layers.8.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
54933
  "model.layers.8.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
54934
  "model.layers.8.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
54935
+ "model.layers.8.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
54936
  "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
54937
  "model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
54938
  "model.layers.8.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
 
56101
  "model.layers.9.mlp.experts.99.up_proj.weight_packed": "model-00001-of-00004.safetensors",
56102
  "model.layers.9.mlp.experts.99.up_proj.weight_scale": "model-00001-of-00004.safetensors",
56103
  "model.layers.9.mlp.experts.99.up_proj.weight_shape": "model-00001-of-00004.safetensors",
56104
+ "model.layers.9.mlp.gate.weight": "model-00001-of-00004.safetensors",
 
 
56105
  "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
56106
  "model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
56107
  "model.layers.9.self_attn.k_proj.weight_packed": "model-00001-of-00004.safetensors",
recipe.yaml CHANGED
@@ -1,6 +1,6 @@
1
  default_stage:
2
  default_modifiers:
3
  QuantizationModifier:
4
- ignore: [lm_head]
5
  targets: [Linear]
6
  scheme: W4A16
 
1
  default_stage:
2
  default_modifiers:
3
  QuantizationModifier:
4
+ ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$']
5
  targets: [Linear]
6
  scheme: W4A16
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:574de68a0f63f2004784a421c7d42c2b2786c05cb38542d2ed3525757a1f7fde
3
- size 11422932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654