Multiple quants for MLX framework

#1
This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +0 -11
  3. DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/config.json +0 -1790
  4. DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/model.safetensors +0 -3
  5. DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/model.safetensors.index.json +0 -742
  6. DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/special_tokens_map.json +0 -23
  7. DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/tokenizer.json +0 -3
  8. DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/tokenizer_config.json +0 -195
  9. DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/config.json +0 -1790
  10. DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/model.safetensors +0 -3
  11. DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/model.safetensors.index.json +0 -742
  12. DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/special_tokens_map.json +0 -23
  13. DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/tokenizer.json +0 -3
  14. DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/tokenizer_config.json +0 -195
  15. DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/config.json +0 -1790
  16. DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/model.safetensors +0 -3
  17. DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/model.safetensors.index.json +0 -742
  18. DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/special_tokens_map.json +0 -23
  19. DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/tokenizer.json +0 -3
  20. DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/tokenizer_config.json +0 -195
  21. DeepSeek-R1-Distill-Qwen-7B-3bit/config.json +0 -36
  22. DeepSeek-R1-Distill-Qwen-7B-3bit/model.safetensors +0 -3
  23. DeepSeek-R1-Distill-Qwen-7B-3bit/model.safetensors.index.json +0 -742
  24. DeepSeek-R1-Distill-Qwen-7B-3bit/special_tokens_map.json +0 -23
  25. DeepSeek-R1-Distill-Qwen-7B-3bit/tokenizer.json +0 -3
  26. DeepSeek-R1-Distill-Qwen-7B-3bit/tokenizer_config.json +0 -195
  27. DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/config.json +0 -1790
  28. DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/model.safetensors +0 -3
  29. DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/model.safetensors.index.json +0 -742
  30. DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/special_tokens_map.json +0 -23
  31. DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/tokenizer.json +0 -3
  32. DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/tokenizer_config.json +0 -195
  33. DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/config.json +0 -1790
  34. DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/model.safetensors +0 -3
  35. DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/model.safetensors.index.json +0 -742
  36. DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/special_tokens_map.json +0 -23
  37. DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/tokenizer.json +0 -3
  38. DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/tokenizer_config.json +0 -195
  39. DeepSeek-R1-Distill-Qwen-7B-4bit/config.json +0 -36
  40. DeepSeek-R1-Distill-Qwen-7B-4bit/model.safetensors +0 -3
  41. DeepSeek-R1-Distill-Qwen-7B-4bit/model.safetensors.index.json +0 -742
  42. DeepSeek-R1-Distill-Qwen-7B-4bit/special_tokens_map.json +0 -23
  43. DeepSeek-R1-Distill-Qwen-7B-4bit/tokenizer.json +0 -3
  44. DeepSeek-R1-Distill-Qwen-7B-4bit/tokenizer_config.json +0 -195
  45. DeepSeek-R1-Distill-Qwen-7B-6bit/config.json +0 -36
  46. DeepSeek-R1-Distill-Qwen-7B-6bit/model-00001-of-00002.safetensors +0 -3
  47. DeepSeek-R1-Distill-Qwen-7B-6bit/model-00002-of-00002.safetensors +0 -3
  48. DeepSeek-R1-Distill-Qwen-7B-6bit/model.safetensors.index.json +0 -742
  49. DeepSeek-R1-Distill-Qwen-7B-6bit/special_tokens_map.json +0 -23
  50. DeepSeek-R1-Distill-Qwen-7B-6bit/tokenizer.json +0 -3
.DS_Store DELETED
Binary file (12.3 kB)
 
.gitattributes CHANGED
@@ -33,14 +33,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
- DeepSeek-R1-Distill-Qwen-7B-4bit/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
- DeepSeek-R1-Distill-Qwen-7B-bfloat16/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
- DeepSeek-R1-Distill-Qwen-7B-8bit/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
- DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/tokenizer.json filter=lfs diff=lfs merge=lfs -text
41
- DeepSeek-R1-Distill-Qwen-7B-6bit/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
- DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
- DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
- DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/tokenizer.json filter=lfs diff=lfs merge=lfs -text
45
- DeepSeek-R1-Distill-Qwen-7B-3bit/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
- DeepSeek-R1-Distill-Qwen-7B-float16/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/config.json DELETED
@@ -1,1790 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen2ForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization": {
19
- "group_size": 64,
20
- "bits": null,
21
- "model.embed_tokens": {
22
- "group_size": 64,
23
- "bits": 2
24
- },
25
- "model.layers.0.self_attn.q_proj": {
26
- "group_size": 64,
27
- "bits": 2
28
- },
29
- "model.layers.0.self_attn.k_proj": {
30
- "group_size": 64,
31
- "bits": 2
32
- },
33
- "model.layers.0.self_attn.v_proj": {
34
- "group_size": 64,
35
- "bits": 4
36
- },
37
- "model.layers.0.self_attn.o_proj": {
38
- "group_size": 64,
39
- "bits": 2
40
- },
41
- "model.layers.0.self_attn.rope": false,
42
- "model.layers.0.mlp.gate_proj": {
43
- "group_size": 64,
44
- "bits": 2
45
- },
46
- "model.layers.0.mlp.down_proj": {
47
- "group_size": 64,
48
- "bits": 4
49
- },
50
- "model.layers.0.mlp.up_proj": {
51
- "group_size": 64,
52
- "bits": 2
53
- },
54
- "model.layers.0.input_layernorm": false,
55
- "model.layers.0.post_attention_layernorm": false,
56
- "model.layers.1.self_attn.q_proj": {
57
- "group_size": 64,
58
- "bits": 2
59
- },
60
- "model.layers.1.self_attn.k_proj": {
61
- "group_size": 64,
62
- "bits": 2
63
- },
64
- "model.layers.1.self_attn.v_proj": {
65
- "group_size": 64,
66
- "bits": 4
67
- },
68
- "model.layers.1.self_attn.o_proj": {
69
- "group_size": 64,
70
- "bits": 2
71
- },
72
- "model.layers.1.self_attn.rope": false,
73
- "model.layers.1.mlp.gate_proj": {
74
- "group_size": 64,
75
- "bits": 2
76
- },
77
- "model.layers.1.mlp.down_proj": {
78
- "group_size": 64,
79
- "bits": 4
80
- },
81
- "model.layers.1.mlp.up_proj": {
82
- "group_size": 64,
83
- "bits": 2
84
- },
85
- "model.layers.1.input_layernorm": false,
86
- "model.layers.1.post_attention_layernorm": false,
87
- "model.layers.2.self_attn.q_proj": {
88
- "group_size": 64,
89
- "bits": 2
90
- },
91
- "model.layers.2.self_attn.k_proj": {
92
- "group_size": 64,
93
- "bits": 2
94
- },
95
- "model.layers.2.self_attn.v_proj": {
96
- "group_size": 64,
97
- "bits": 4
98
- },
99
- "model.layers.2.self_attn.o_proj": {
100
- "group_size": 64,
101
- "bits": 2
102
- },
103
- "model.layers.2.self_attn.rope": false,
104
- "model.layers.2.mlp.gate_proj": {
105
- "group_size": 64,
106
- "bits": 2
107
- },
108
- "model.layers.2.mlp.down_proj": {
109
- "group_size": 64,
110
- "bits": 4
111
- },
112
- "model.layers.2.mlp.up_proj": {
113
- "group_size": 64,
114
- "bits": 2
115
- },
116
- "model.layers.2.input_layernorm": false,
117
- "model.layers.2.post_attention_layernorm": false,
118
- "model.layers.3.self_attn.q_proj": {
119
- "group_size": 64,
120
- "bits": 2
121
- },
122
- "model.layers.3.self_attn.k_proj": {
123
- "group_size": 64,
124
- "bits": 2
125
- },
126
- "model.layers.3.self_attn.v_proj": {
127
- "group_size": 64,
128
- "bits": 2
129
- },
130
- "model.layers.3.self_attn.o_proj": {
131
- "group_size": 64,
132
- "bits": 2
133
- },
134
- "model.layers.3.self_attn.rope": false,
135
- "model.layers.3.mlp.gate_proj": {
136
- "group_size": 64,
137
- "bits": 2
138
- },
139
- "model.layers.3.mlp.down_proj": {
140
- "group_size": 64,
141
- "bits": 2
142
- },
143
- "model.layers.3.mlp.up_proj": {
144
- "group_size": 64,
145
- "bits": 2
146
- },
147
- "model.layers.3.input_layernorm": false,
148
- "model.layers.3.post_attention_layernorm": false,
149
- "model.layers.4.self_attn.q_proj": {
150
- "group_size": 64,
151
- "bits": 2
152
- },
153
- "model.layers.4.self_attn.k_proj": {
154
- "group_size": 64,
155
- "bits": 2
156
- },
157
- "model.layers.4.self_attn.v_proj": {
158
- "group_size": 64,
159
- "bits": 2
160
- },
161
- "model.layers.4.self_attn.o_proj": {
162
- "group_size": 64,
163
- "bits": 2
164
- },
165
- "model.layers.4.self_attn.rope": false,
166
- "model.layers.4.mlp.gate_proj": {
167
- "group_size": 64,
168
- "bits": 2
169
- },
170
- "model.layers.4.mlp.down_proj": {
171
- "group_size": 64,
172
- "bits": 2
173
- },
174
- "model.layers.4.mlp.up_proj": {
175
- "group_size": 64,
176
- "bits": 2
177
- },
178
- "model.layers.4.input_layernorm": false,
179
- "model.layers.4.post_attention_layernorm": false,
180
- "model.layers.5.self_attn.q_proj": {
181
- "group_size": 64,
182
- "bits": 2
183
- },
184
- "model.layers.5.self_attn.k_proj": {
185
- "group_size": 64,
186
- "bits": 2
187
- },
188
- "model.layers.5.self_attn.v_proj": {
189
- "group_size": 64,
190
- "bits": 4
191
- },
192
- "model.layers.5.self_attn.o_proj": {
193
- "group_size": 64,
194
- "bits": 2
195
- },
196
- "model.layers.5.self_attn.rope": false,
197
- "model.layers.5.mlp.gate_proj": {
198
- "group_size": 64,
199
- "bits": 2
200
- },
201
- "model.layers.5.mlp.down_proj": {
202
- "group_size": 64,
203
- "bits": 4
204
- },
205
- "model.layers.5.mlp.up_proj": {
206
- "group_size": 64,
207
- "bits": 2
208
- },
209
- "model.layers.5.input_layernorm": false,
210
- "model.layers.5.post_attention_layernorm": false,
211
- "model.layers.6.self_attn.q_proj": {
212
- "group_size": 64,
213
- "bits": 2
214
- },
215
- "model.layers.6.self_attn.k_proj": {
216
- "group_size": 64,
217
- "bits": 2
218
- },
219
- "model.layers.6.self_attn.v_proj": {
220
- "group_size": 64,
221
- "bits": 2
222
- },
223
- "model.layers.6.self_attn.o_proj": {
224
- "group_size": 64,
225
- "bits": 2
226
- },
227
- "model.layers.6.self_attn.rope": false,
228
- "model.layers.6.mlp.gate_proj": {
229
- "group_size": 64,
230
- "bits": 2
231
- },
232
- "model.layers.6.mlp.down_proj": {
233
- "group_size": 64,
234
- "bits": 2
235
- },
236
- "model.layers.6.mlp.up_proj": {
237
- "group_size": 64,
238
- "bits": 2
239
- },
240
- "model.layers.6.input_layernorm": false,
241
- "model.layers.6.post_attention_layernorm": false,
242
- "model.layers.7.self_attn.q_proj": {
243
- "group_size": 64,
244
- "bits": 2
245
- },
246
- "model.layers.7.self_attn.k_proj": {
247
- "group_size": 64,
248
- "bits": 2
249
- },
250
- "model.layers.7.self_attn.v_proj": {
251
- "group_size": 64,
252
- "bits": 2
253
- },
254
- "model.layers.7.self_attn.o_proj": {
255
- "group_size": 64,
256
- "bits": 2
257
- },
258
- "model.layers.7.self_attn.rope": false,
259
- "model.layers.7.mlp.gate_proj": {
260
- "group_size": 64,
261
- "bits": 2
262
- },
263
- "model.layers.7.mlp.down_proj": {
264
- "group_size": 64,
265
- "bits": 2
266
- },
267
- "model.layers.7.mlp.up_proj": {
268
- "group_size": 64,
269
- "bits": 2
270
- },
271
- "model.layers.7.input_layernorm": false,
272
- "model.layers.7.post_attention_layernorm": false,
273
- "model.layers.8.self_attn.q_proj": {
274
- "group_size": 64,
275
- "bits": 2
276
- },
277
- "model.layers.8.self_attn.k_proj": {
278
- "group_size": 64,
279
- "bits": 2
280
- },
281
- "model.layers.8.self_attn.v_proj": {
282
- "group_size": 64,
283
- "bits": 4
284
- },
285
- "model.layers.8.self_attn.o_proj": {
286
- "group_size": 64,
287
- "bits": 2
288
- },
289
- "model.layers.8.self_attn.rope": false,
290
- "model.layers.8.mlp.gate_proj": {
291
- "group_size": 64,
292
- "bits": 2
293
- },
294
- "model.layers.8.mlp.down_proj": {
295
- "group_size": 64,
296
- "bits": 4
297
- },
298
- "model.layers.8.mlp.up_proj": {
299
- "group_size": 64,
300
- "bits": 2
301
- },
302
- "model.layers.8.input_layernorm": false,
303
- "model.layers.8.post_attention_layernorm": false,
304
- "model.layers.9.self_attn.q_proj": {
305
- "group_size": 64,
306
- "bits": 2
307
- },
308
- "model.layers.9.self_attn.k_proj": {
309
- "group_size": 64,
310
- "bits": 2
311
- },
312
- "model.layers.9.self_attn.v_proj": {
313
- "group_size": 64,
314
- "bits": 2
315
- },
316
- "model.layers.9.self_attn.o_proj": {
317
- "group_size": 64,
318
- "bits": 2
319
- },
320
- "model.layers.9.self_attn.rope": false,
321
- "model.layers.9.mlp.gate_proj": {
322
- "group_size": 64,
323
- "bits": 2
324
- },
325
- "model.layers.9.mlp.down_proj": {
326
- "group_size": 64,
327
- "bits": 2
328
- },
329
- "model.layers.9.mlp.up_proj": {
330
- "group_size": 64,
331
- "bits": 2
332
- },
333
- "model.layers.9.input_layernorm": false,
334
- "model.layers.9.post_attention_layernorm": false,
335
- "model.layers.10.self_attn.q_proj": {
336
- "group_size": 64,
337
- "bits": 2
338
- },
339
- "model.layers.10.self_attn.k_proj": {
340
- "group_size": 64,
341
- "bits": 2
342
- },
343
- "model.layers.10.self_attn.v_proj": {
344
- "group_size": 64,
345
- "bits": 2
346
- },
347
- "model.layers.10.self_attn.o_proj": {
348
- "group_size": 64,
349
- "bits": 2
350
- },
351
- "model.layers.10.self_attn.rope": false,
352
- "model.layers.10.mlp.gate_proj": {
353
- "group_size": 64,
354
- "bits": 2
355
- },
356
- "model.layers.10.mlp.down_proj": {
357
- "group_size": 64,
358
- "bits": 2
359
- },
360
- "model.layers.10.mlp.up_proj": {
361
- "group_size": 64,
362
- "bits": 2
363
- },
364
- "model.layers.10.input_layernorm": false,
365
- "model.layers.10.post_attention_layernorm": false,
366
- "model.layers.11.self_attn.q_proj": {
367
- "group_size": 64,
368
- "bits": 2
369
- },
370
- "model.layers.11.self_attn.k_proj": {
371
- "group_size": 64,
372
- "bits": 2
373
- },
374
- "model.layers.11.self_attn.v_proj": {
375
- "group_size": 64,
376
- "bits": 4
377
- },
378
- "model.layers.11.self_attn.o_proj": {
379
- "group_size": 64,
380
- "bits": 2
381
- },
382
- "model.layers.11.self_attn.rope": false,
383
- "model.layers.11.mlp.gate_proj": {
384
- "group_size": 64,
385
- "bits": 2
386
- },
387
- "model.layers.11.mlp.down_proj": {
388
- "group_size": 64,
389
- "bits": 4
390
- },
391
- "model.layers.11.mlp.up_proj": {
392
- "group_size": 64,
393
- "bits": 2
394
- },
395
- "model.layers.11.input_layernorm": false,
396
- "model.layers.11.post_attention_layernorm": false,
397
- "model.layers.12.self_attn.q_proj": {
398
- "group_size": 64,
399
- "bits": 2
400
- },
401
- "model.layers.12.self_attn.k_proj": {
402
- "group_size": 64,
403
- "bits": 2
404
- },
405
- "model.layers.12.self_attn.v_proj": {
406
- "group_size": 64,
407
- "bits": 2
408
- },
409
- "model.layers.12.self_attn.o_proj": {
410
- "group_size": 64,
411
- "bits": 2
412
- },
413
- "model.layers.12.self_attn.rope": false,
414
- "model.layers.12.mlp.gate_proj": {
415
- "group_size": 64,
416
- "bits": 2
417
- },
418
- "model.layers.12.mlp.down_proj": {
419
- "group_size": 64,
420
- "bits": 2
421
- },
422
- "model.layers.12.mlp.up_proj": {
423
- "group_size": 64,
424
- "bits": 2
425
- },
426
- "model.layers.12.input_layernorm": false,
427
- "model.layers.12.post_attention_layernorm": false,
428
- "model.layers.13.self_attn.q_proj": {
429
- "group_size": 64,
430
- "bits": 2
431
- },
432
- "model.layers.13.self_attn.k_proj": {
433
- "group_size": 64,
434
- "bits": 2
435
- },
436
- "model.layers.13.self_attn.v_proj": {
437
- "group_size": 64,
438
- "bits": 2
439
- },
440
- "model.layers.13.self_attn.o_proj": {
441
- "group_size": 64,
442
- "bits": 2
443
- },
444
- "model.layers.13.self_attn.rope": false,
445
- "model.layers.13.mlp.gate_proj": {
446
- "group_size": 64,
447
- "bits": 2
448
- },
449
- "model.layers.13.mlp.down_proj": {
450
- "group_size": 64,
451
- "bits": 2
452
- },
453
- "model.layers.13.mlp.up_proj": {
454
- "group_size": 64,
455
- "bits": 2
456
- },
457
- "model.layers.13.input_layernorm": false,
458
- "model.layers.13.post_attention_layernorm": false,
459
- "model.layers.14.self_attn.q_proj": {
460
- "group_size": 64,
461
- "bits": 2
462
- },
463
- "model.layers.14.self_attn.k_proj": {
464
- "group_size": 64,
465
- "bits": 2
466
- },
467
- "model.layers.14.self_attn.v_proj": {
468
- "group_size": 64,
469
- "bits": 4
470
- },
471
- "model.layers.14.self_attn.o_proj": {
472
- "group_size": 64,
473
- "bits": 2
474
- },
475
- "model.layers.14.self_attn.rope": false,
476
- "model.layers.14.mlp.gate_proj": {
477
- "group_size": 64,
478
- "bits": 2
479
- },
480
- "model.layers.14.mlp.down_proj": {
481
- "group_size": 64,
482
- "bits": 4
483
- },
484
- "model.layers.14.mlp.up_proj": {
485
- "group_size": 64,
486
- "bits": 2
487
- },
488
- "model.layers.14.input_layernorm": false,
489
- "model.layers.14.post_attention_layernorm": false,
490
- "model.layers.15.self_attn.q_proj": {
491
- "group_size": 64,
492
- "bits": 2
493
- },
494
- "model.layers.15.self_attn.k_proj": {
495
- "group_size": 64,
496
- "bits": 2
497
- },
498
- "model.layers.15.self_attn.v_proj": {
499
- "group_size": 64,
500
- "bits": 2
501
- },
502
- "model.layers.15.self_attn.o_proj": {
503
- "group_size": 64,
504
- "bits": 2
505
- },
506
- "model.layers.15.self_attn.rope": false,
507
- "model.layers.15.mlp.gate_proj": {
508
- "group_size": 64,
509
- "bits": 2
510
- },
511
- "model.layers.15.mlp.down_proj": {
512
- "group_size": 64,
513
- "bits": 2
514
- },
515
- "model.layers.15.mlp.up_proj": {
516
- "group_size": 64,
517
- "bits": 2
518
- },
519
- "model.layers.15.input_layernorm": false,
520
- "model.layers.15.post_attention_layernorm": false,
521
- "model.layers.16.self_attn.q_proj": {
522
- "group_size": 64,
523
- "bits": 2
524
- },
525
- "model.layers.16.self_attn.k_proj": {
526
- "group_size": 64,
527
- "bits": 2
528
- },
529
- "model.layers.16.self_attn.v_proj": {
530
- "group_size": 64,
531
- "bits": 2
532
- },
533
- "model.layers.16.self_attn.o_proj": {
534
- "group_size": 64,
535
- "bits": 2
536
- },
537
- "model.layers.16.self_attn.rope": false,
538
- "model.layers.16.mlp.gate_proj": {
539
- "group_size": 64,
540
- "bits": 2
541
- },
542
- "model.layers.16.mlp.down_proj": {
543
- "group_size": 64,
544
- "bits": 2
545
- },
546
- "model.layers.16.mlp.up_proj": {
547
- "group_size": 64,
548
- "bits": 2
549
- },
550
- "model.layers.16.input_layernorm": false,
551
- "model.layers.16.post_attention_layernorm": false,
552
- "model.layers.17.self_attn.q_proj": {
553
- "group_size": 64,
554
- "bits": 2
555
- },
556
- "model.layers.17.self_attn.k_proj": {
557
- "group_size": 64,
558
- "bits": 2
559
- },
560
- "model.layers.17.self_attn.v_proj": {
561
- "group_size": 64,
562
- "bits": 4
563
- },
564
- "model.layers.17.self_attn.o_proj": {
565
- "group_size": 64,
566
- "bits": 2
567
- },
568
- "model.layers.17.self_attn.rope": false,
569
- "model.layers.17.mlp.gate_proj": {
570
- "group_size": 64,
571
- "bits": 2
572
- },
573
- "model.layers.17.mlp.down_proj": {
574
- "group_size": 64,
575
- "bits": 4
576
- },
577
- "model.layers.17.mlp.up_proj": {
578
- "group_size": 64,
579
- "bits": 2
580
- },
581
- "model.layers.17.input_layernorm": false,
582
- "model.layers.17.post_attention_layernorm": false,
583
- "model.layers.18.self_attn.q_proj": {
584
- "group_size": 64,
585
- "bits": 2
586
- },
587
- "model.layers.18.self_attn.k_proj": {
588
- "group_size": 64,
589
- "bits": 2
590
- },
591
- "model.layers.18.self_attn.v_proj": {
592
- "group_size": 64,
593
- "bits": 2
594
- },
595
- "model.layers.18.self_attn.o_proj": {
596
- "group_size": 64,
597
- "bits": 2
598
- },
599
- "model.layers.18.self_attn.rope": false,
600
- "model.layers.18.mlp.gate_proj": {
601
- "group_size": 64,
602
- "bits": 2
603
- },
604
- "model.layers.18.mlp.down_proj": {
605
- "group_size": 64,
606
- "bits": 2
607
- },
608
- "model.layers.18.mlp.up_proj": {
609
- "group_size": 64,
610
- "bits": 2
611
- },
612
- "model.layers.18.input_layernorm": false,
613
- "model.layers.18.post_attention_layernorm": false,
614
- "model.layers.19.self_attn.q_proj": {
615
- "group_size": 64,
616
- "bits": 2
617
- },
618
- "model.layers.19.self_attn.k_proj": {
619
- "group_size": 64,
620
- "bits": 2
621
- },
622
- "model.layers.19.self_attn.v_proj": {
623
- "group_size": 64,
624
- "bits": 2
625
- },
626
- "model.layers.19.self_attn.o_proj": {
627
- "group_size": 64,
628
- "bits": 2
629
- },
630
- "model.layers.19.self_attn.rope": false,
631
- "model.layers.19.mlp.gate_proj": {
632
- "group_size": 64,
633
- "bits": 2
634
- },
635
- "model.layers.19.mlp.down_proj": {
636
- "group_size": 64,
637
- "bits": 2
638
- },
639
- "model.layers.19.mlp.up_proj": {
640
- "group_size": 64,
641
- "bits": 2
642
- },
643
- "model.layers.19.input_layernorm": false,
644
- "model.layers.19.post_attention_layernorm": false,
645
- "model.layers.20.self_attn.q_proj": {
646
- "group_size": 64,
647
- "bits": 2
648
- },
649
- "model.layers.20.self_attn.k_proj": {
650
- "group_size": 64,
651
- "bits": 2
652
- },
653
- "model.layers.20.self_attn.v_proj": {
654
- "group_size": 64,
655
- "bits": 4
656
- },
657
- "model.layers.20.self_attn.o_proj": {
658
- "group_size": 64,
659
- "bits": 2
660
- },
661
- "model.layers.20.self_attn.rope": false,
662
- "model.layers.20.mlp.gate_proj": {
663
- "group_size": 64,
664
- "bits": 2
665
- },
666
- "model.layers.20.mlp.down_proj": {
667
- "group_size": 64,
668
- "bits": 4
669
- },
670
- "model.layers.20.mlp.up_proj": {
671
- "group_size": 64,
672
- "bits": 2
673
- },
674
- "model.layers.20.input_layernorm": false,
675
- "model.layers.20.post_attention_layernorm": false,
676
- "model.layers.21.self_attn.q_proj": {
677
- "group_size": 64,
678
- "bits": 2
679
- },
680
- "model.layers.21.self_attn.k_proj": {
681
- "group_size": 64,
682
- "bits": 2
683
- },
684
- "model.layers.21.self_attn.v_proj": {
685
- "group_size": 64,
686
- "bits": 2
687
- },
688
- "model.layers.21.self_attn.o_proj": {
689
- "group_size": 64,
690
- "bits": 2
691
- },
692
- "model.layers.21.self_attn.rope": false,
693
- "model.layers.21.mlp.gate_proj": {
694
- "group_size": 64,
695
- "bits": 2
696
- },
697
- "model.layers.21.mlp.down_proj": {
698
- "group_size": 64,
699
- "bits": 2
700
- },
701
- "model.layers.21.mlp.up_proj": {
702
- "group_size": 64,
703
- "bits": 2
704
- },
705
- "model.layers.21.input_layernorm": false,
706
- "model.layers.21.post_attention_layernorm": false,
707
- "model.layers.22.self_attn.q_proj": {
708
- "group_size": 64,
709
- "bits": 2
710
- },
711
- "model.layers.22.self_attn.k_proj": {
712
- "group_size": 64,
713
- "bits": 2
714
- },
715
- "model.layers.22.self_attn.v_proj": {
716
- "group_size": 64,
717
- "bits": 2
718
- },
719
- "model.layers.22.self_attn.o_proj": {
720
- "group_size": 64,
721
- "bits": 2
722
- },
723
- "model.layers.22.self_attn.rope": false,
724
- "model.layers.22.mlp.gate_proj": {
725
- "group_size": 64,
726
- "bits": 2
727
- },
728
- "model.layers.22.mlp.down_proj": {
729
- "group_size": 64,
730
- "bits": 2
731
- },
732
- "model.layers.22.mlp.up_proj": {
733
- "group_size": 64,
734
- "bits": 2
735
- },
736
- "model.layers.22.input_layernorm": false,
737
- "model.layers.22.post_attention_layernorm": false,
738
- "model.layers.23.self_attn.q_proj": {
739
- "group_size": 64,
740
- "bits": 2
741
- },
742
- "model.layers.23.self_attn.k_proj": {
743
- "group_size": 64,
744
- "bits": 2
745
- },
746
- "model.layers.23.self_attn.v_proj": {
747
- "group_size": 64,
748
- "bits": 4
749
- },
750
- "model.layers.23.self_attn.o_proj": {
751
- "group_size": 64,
752
- "bits": 2
753
- },
754
- "model.layers.23.self_attn.rope": false,
755
- "model.layers.23.mlp.gate_proj": {
756
- "group_size": 64,
757
- "bits": 2
758
- },
759
- "model.layers.23.mlp.down_proj": {
760
- "group_size": 64,
761
- "bits": 4
762
- },
763
- "model.layers.23.mlp.up_proj": {
764
- "group_size": 64,
765
- "bits": 2
766
- },
767
- "model.layers.23.input_layernorm": false,
768
- "model.layers.23.post_attention_layernorm": false,
769
- "model.layers.24.self_attn.q_proj": {
770
- "group_size": 64,
771
- "bits": 2
772
- },
773
- "model.layers.24.self_attn.k_proj": {
774
- "group_size": 64,
775
- "bits": 2
776
- },
777
- "model.layers.24.self_attn.v_proj": {
778
- "group_size": 64,
779
- "bits": 4
780
- },
781
- "model.layers.24.self_attn.o_proj": {
782
- "group_size": 64,
783
- "bits": 2
784
- },
785
- "model.layers.24.self_attn.rope": false,
786
- "model.layers.24.mlp.gate_proj": {
787
- "group_size": 64,
788
- "bits": 2
789
- },
790
- "model.layers.24.mlp.down_proj": {
791
- "group_size": 64,
792
- "bits": 4
793
- },
794
- "model.layers.24.mlp.up_proj": {
795
- "group_size": 64,
796
- "bits": 2
797
- },
798
- "model.layers.24.input_layernorm": false,
799
- "model.layers.24.post_attention_layernorm": false,
800
- "model.layers.25.self_attn.q_proj": {
801
- "group_size": 64,
802
- "bits": 2
803
- },
804
- "model.layers.25.self_attn.k_proj": {
805
- "group_size": 64,
806
- "bits": 2
807
- },
808
- "model.layers.25.self_attn.v_proj": {
809
- "group_size": 64,
810
- "bits": 4
811
- },
812
- "model.layers.25.self_attn.o_proj": {
813
- "group_size": 64,
814
- "bits": 2
815
- },
816
- "model.layers.25.self_attn.rope": false,
817
- "model.layers.25.mlp.gate_proj": {
818
- "group_size": 64,
819
- "bits": 2
820
- },
821
- "model.layers.25.mlp.down_proj": {
822
- "group_size": 64,
823
- "bits": 4
824
- },
825
- "model.layers.25.mlp.up_proj": {
826
- "group_size": 64,
827
- "bits": 2
828
- },
829
- "model.layers.25.input_layernorm": false,
830
- "model.layers.25.post_attention_layernorm": false,
831
- "model.layers.26.self_attn.q_proj": {
832
- "group_size": 64,
833
- "bits": 2
834
- },
835
- "model.layers.26.self_attn.k_proj": {
836
- "group_size": 64,
837
- "bits": 2
838
- },
839
- "model.layers.26.self_attn.v_proj": {
840
- "group_size": 64,
841
- "bits": 4
842
- },
843
- "model.layers.26.self_attn.o_proj": {
844
- "group_size": 64,
845
- "bits": 2
846
- },
847
- "model.layers.26.self_attn.rope": false,
848
- "model.layers.26.mlp.gate_proj": {
849
- "group_size": 64,
850
- "bits": 2
851
- },
852
- "model.layers.26.mlp.down_proj": {
853
- "group_size": 64,
854
- "bits": 4
855
- },
856
- "model.layers.26.mlp.up_proj": {
857
- "group_size": 64,
858
- "bits": 2
859
- },
860
- "model.layers.26.input_layernorm": false,
861
- "model.layers.26.post_attention_layernorm": false,
862
- "model.layers.27.self_attn.q_proj": {
863
- "group_size": 64,
864
- "bits": 2
865
- },
866
- "model.layers.27.self_attn.k_proj": {
867
- "group_size": 64,
868
- "bits": 2
869
- },
870
- "model.layers.27.self_attn.v_proj": {
871
- "group_size": 64,
872
- "bits": 4
873
- },
874
- "model.layers.27.self_attn.o_proj": {
875
- "group_size": 64,
876
- "bits": 2
877
- },
878
- "model.layers.27.self_attn.rope": false,
879
- "model.layers.27.mlp.gate_proj": {
880
- "group_size": 64,
881
- "bits": 2
882
- },
883
- "model.layers.27.mlp.down_proj": {
884
- "group_size": 64,
885
- "bits": 4
886
- },
887
- "model.layers.27.mlp.up_proj": {
888
- "group_size": 64,
889
- "bits": 2
890
- },
891
- "model.layers.27.input_layernorm": false,
892
- "model.layers.27.post_attention_layernorm": false,
893
- "model.norm": false,
894
- "lm_head": {
895
- "group_size": 64,
896
- "bits": 4
897
- }
898
- },
899
- "quantization_config": {
900
- "group_size": 64,
901
- "bits": null,
902
- "model.embed_tokens": {
903
- "group_size": 64,
904
- "bits": 2
905
- },
906
- "model.layers.0.self_attn.q_proj": {
907
- "group_size": 64,
908
- "bits": 2
909
- },
910
- "model.layers.0.self_attn.k_proj": {
911
- "group_size": 64,
912
- "bits": 2
913
- },
914
- "model.layers.0.self_attn.v_proj": {
915
- "group_size": 64,
916
- "bits": 4
917
- },
918
- "model.layers.0.self_attn.o_proj": {
919
- "group_size": 64,
920
- "bits": 2
921
- },
922
- "model.layers.0.self_attn.rope": false,
923
- "model.layers.0.mlp.gate_proj": {
924
- "group_size": 64,
925
- "bits": 2
926
- },
927
- "model.layers.0.mlp.down_proj": {
928
- "group_size": 64,
929
- "bits": 4
930
- },
931
- "model.layers.0.mlp.up_proj": {
932
- "group_size": 64,
933
- "bits": 2
934
- },
935
- "model.layers.0.input_layernorm": false,
936
- "model.layers.0.post_attention_layernorm": false,
937
- "model.layers.1.self_attn.q_proj": {
938
- "group_size": 64,
939
- "bits": 2
940
- },
941
- "model.layers.1.self_attn.k_proj": {
942
- "group_size": 64,
943
- "bits": 2
944
- },
945
- "model.layers.1.self_attn.v_proj": {
946
- "group_size": 64,
947
- "bits": 4
948
- },
949
- "model.layers.1.self_attn.o_proj": {
950
- "group_size": 64,
951
- "bits": 2
952
- },
953
- "model.layers.1.self_attn.rope": false,
954
- "model.layers.1.mlp.gate_proj": {
955
- "group_size": 64,
956
- "bits": 2
957
- },
958
- "model.layers.1.mlp.down_proj": {
959
- "group_size": 64,
960
- "bits": 4
961
- },
962
- "model.layers.1.mlp.up_proj": {
963
- "group_size": 64,
964
- "bits": 2
965
- },
966
- "model.layers.1.input_layernorm": false,
967
- "model.layers.1.post_attention_layernorm": false,
968
- "model.layers.2.self_attn.q_proj": {
969
- "group_size": 64,
970
- "bits": 2
971
- },
972
- "model.layers.2.self_attn.k_proj": {
973
- "group_size": 64,
974
- "bits": 2
975
- },
976
- "model.layers.2.self_attn.v_proj": {
977
- "group_size": 64,
978
- "bits": 4
979
- },
980
- "model.layers.2.self_attn.o_proj": {
981
- "group_size": 64,
982
- "bits": 2
983
- },
984
- "model.layers.2.self_attn.rope": false,
985
- "model.layers.2.mlp.gate_proj": {
986
- "group_size": 64,
987
- "bits": 2
988
- },
989
- "model.layers.2.mlp.down_proj": {
990
- "group_size": 64,
991
- "bits": 4
992
- },
993
- "model.layers.2.mlp.up_proj": {
994
- "group_size": 64,
995
- "bits": 2
996
- },
997
- "model.layers.2.input_layernorm": false,
998
- "model.layers.2.post_attention_layernorm": false,
999
- "model.layers.3.self_attn.q_proj": {
1000
- "group_size": 64,
1001
- "bits": 2
1002
- },
1003
- "model.layers.3.self_attn.k_proj": {
1004
- "group_size": 64,
1005
- "bits": 2
1006
- },
1007
- "model.layers.3.self_attn.v_proj": {
1008
- "group_size": 64,
1009
- "bits": 2
1010
- },
1011
- "model.layers.3.self_attn.o_proj": {
1012
- "group_size": 64,
1013
- "bits": 2
1014
- },
1015
- "model.layers.3.self_attn.rope": false,
1016
- "model.layers.3.mlp.gate_proj": {
1017
- "group_size": 64,
1018
- "bits": 2
1019
- },
1020
- "model.layers.3.mlp.down_proj": {
1021
- "group_size": 64,
1022
- "bits": 2
1023
- },
1024
- "model.layers.3.mlp.up_proj": {
1025
- "group_size": 64,
1026
- "bits": 2
1027
- },
1028
- "model.layers.3.input_layernorm": false,
1029
- "model.layers.3.post_attention_layernorm": false,
1030
- "model.layers.4.self_attn.q_proj": {
1031
- "group_size": 64,
1032
- "bits": 2
1033
- },
1034
- "model.layers.4.self_attn.k_proj": {
1035
- "group_size": 64,
1036
- "bits": 2
1037
- },
1038
- "model.layers.4.self_attn.v_proj": {
1039
- "group_size": 64,
1040
- "bits": 2
1041
- },
1042
- "model.layers.4.self_attn.o_proj": {
1043
- "group_size": 64,
1044
- "bits": 2
1045
- },
1046
- "model.layers.4.self_attn.rope": false,
1047
- "model.layers.4.mlp.gate_proj": {
1048
- "group_size": 64,
1049
- "bits": 2
1050
- },
1051
- "model.layers.4.mlp.down_proj": {
1052
- "group_size": 64,
1053
- "bits": 2
1054
- },
1055
- "model.layers.4.mlp.up_proj": {
1056
- "group_size": 64,
1057
- "bits": 2
1058
- },
1059
- "model.layers.4.input_layernorm": false,
1060
- "model.layers.4.post_attention_layernorm": false,
1061
- "model.layers.5.self_attn.q_proj": {
1062
- "group_size": 64,
1063
- "bits": 2
1064
- },
1065
- "model.layers.5.self_attn.k_proj": {
1066
- "group_size": 64,
1067
- "bits": 2
1068
- },
1069
- "model.layers.5.self_attn.v_proj": {
1070
- "group_size": 64,
1071
- "bits": 4
1072
- },
1073
- "model.layers.5.self_attn.o_proj": {
1074
- "group_size": 64,
1075
- "bits": 2
1076
- },
1077
- "model.layers.5.self_attn.rope": false,
1078
- "model.layers.5.mlp.gate_proj": {
1079
- "group_size": 64,
1080
- "bits": 2
1081
- },
1082
- "model.layers.5.mlp.down_proj": {
1083
- "group_size": 64,
1084
- "bits": 4
1085
- },
1086
- "model.layers.5.mlp.up_proj": {
1087
- "group_size": 64,
1088
- "bits": 2
1089
- },
1090
- "model.layers.5.input_layernorm": false,
1091
- "model.layers.5.post_attention_layernorm": false,
1092
- "model.layers.6.self_attn.q_proj": {
1093
- "group_size": 64,
1094
- "bits": 2
1095
- },
1096
- "model.layers.6.self_attn.k_proj": {
1097
- "group_size": 64,
1098
- "bits": 2
1099
- },
1100
- "model.layers.6.self_attn.v_proj": {
1101
- "group_size": 64,
1102
- "bits": 2
1103
- },
1104
- "model.layers.6.self_attn.o_proj": {
1105
- "group_size": 64,
1106
- "bits": 2
1107
- },
1108
- "model.layers.6.self_attn.rope": false,
1109
- "model.layers.6.mlp.gate_proj": {
1110
- "group_size": 64,
1111
- "bits": 2
1112
- },
1113
- "model.layers.6.mlp.down_proj": {
1114
- "group_size": 64,
1115
- "bits": 2
1116
- },
1117
- "model.layers.6.mlp.up_proj": {
1118
- "group_size": 64,
1119
- "bits": 2
1120
- },
1121
- "model.layers.6.input_layernorm": false,
1122
- "model.layers.6.post_attention_layernorm": false,
1123
- "model.layers.7.self_attn.q_proj": {
1124
- "group_size": 64,
1125
- "bits": 2
1126
- },
1127
- "model.layers.7.self_attn.k_proj": {
1128
- "group_size": 64,
1129
- "bits": 2
1130
- },
1131
- "model.layers.7.self_attn.v_proj": {
1132
- "group_size": 64,
1133
- "bits": 2
1134
- },
1135
- "model.layers.7.self_attn.o_proj": {
1136
- "group_size": 64,
1137
- "bits": 2
1138
- },
1139
- "model.layers.7.self_attn.rope": false,
1140
- "model.layers.7.mlp.gate_proj": {
1141
- "group_size": 64,
1142
- "bits": 2
1143
- },
1144
- "model.layers.7.mlp.down_proj": {
1145
- "group_size": 64,
1146
- "bits": 2
1147
- },
1148
- "model.layers.7.mlp.up_proj": {
1149
- "group_size": 64,
1150
- "bits": 2
1151
- },
1152
- "model.layers.7.input_layernorm": false,
1153
- "model.layers.7.post_attention_layernorm": false,
1154
- "model.layers.8.self_attn.q_proj": {
1155
- "group_size": 64,
1156
- "bits": 2
1157
- },
1158
- "model.layers.8.self_attn.k_proj": {
1159
- "group_size": 64,
1160
- "bits": 2
1161
- },
1162
- "model.layers.8.self_attn.v_proj": {
1163
- "group_size": 64,
1164
- "bits": 4
1165
- },
1166
- "model.layers.8.self_attn.o_proj": {
1167
- "group_size": 64,
1168
- "bits": 2
1169
- },
1170
- "model.layers.8.self_attn.rope": false,
1171
- "model.layers.8.mlp.gate_proj": {
1172
- "group_size": 64,
1173
- "bits": 2
1174
- },
1175
- "model.layers.8.mlp.down_proj": {
1176
- "group_size": 64,
1177
- "bits": 4
1178
- },
1179
- "model.layers.8.mlp.up_proj": {
1180
- "group_size": 64,
1181
- "bits": 2
1182
- },
1183
- "model.layers.8.input_layernorm": false,
1184
- "model.layers.8.post_attention_layernorm": false,
1185
- "model.layers.9.self_attn.q_proj": {
1186
- "group_size": 64,
1187
- "bits": 2
1188
- },
1189
- "model.layers.9.self_attn.k_proj": {
1190
- "group_size": 64,
1191
- "bits": 2
1192
- },
1193
- "model.layers.9.self_attn.v_proj": {
1194
- "group_size": 64,
1195
- "bits": 2
1196
- },
1197
- "model.layers.9.self_attn.o_proj": {
1198
- "group_size": 64,
1199
- "bits": 2
1200
- },
1201
- "model.layers.9.self_attn.rope": false,
1202
- "model.layers.9.mlp.gate_proj": {
1203
- "group_size": 64,
1204
- "bits": 2
1205
- },
1206
- "model.layers.9.mlp.down_proj": {
1207
- "group_size": 64,
1208
- "bits": 2
1209
- },
1210
- "model.layers.9.mlp.up_proj": {
1211
- "group_size": 64,
1212
- "bits": 2
1213
- },
1214
- "model.layers.9.input_layernorm": false,
1215
- "model.layers.9.post_attention_layernorm": false,
1216
- "model.layers.10.self_attn.q_proj": {
1217
- "group_size": 64,
1218
- "bits": 2
1219
- },
1220
- "model.layers.10.self_attn.k_proj": {
1221
- "group_size": 64,
1222
- "bits": 2
1223
- },
1224
- "model.layers.10.self_attn.v_proj": {
1225
- "group_size": 64,
1226
- "bits": 2
1227
- },
1228
- "model.layers.10.self_attn.o_proj": {
1229
- "group_size": 64,
1230
- "bits": 2
1231
- },
1232
- "model.layers.10.self_attn.rope": false,
1233
- "model.layers.10.mlp.gate_proj": {
1234
- "group_size": 64,
1235
- "bits": 2
1236
- },
1237
- "model.layers.10.mlp.down_proj": {
1238
- "group_size": 64,
1239
- "bits": 2
1240
- },
1241
- "model.layers.10.mlp.up_proj": {
1242
- "group_size": 64,
1243
- "bits": 2
1244
- },
1245
- "model.layers.10.input_layernorm": false,
1246
- "model.layers.10.post_attention_layernorm": false,
1247
- "model.layers.11.self_attn.q_proj": {
1248
- "group_size": 64,
1249
- "bits": 2
1250
- },
1251
- "model.layers.11.self_attn.k_proj": {
1252
- "group_size": 64,
1253
- "bits": 2
1254
- },
1255
- "model.layers.11.self_attn.v_proj": {
1256
- "group_size": 64,
1257
- "bits": 4
1258
- },
1259
- "model.layers.11.self_attn.o_proj": {
1260
- "group_size": 64,
1261
- "bits": 2
1262
- },
1263
- "model.layers.11.self_attn.rope": false,
1264
- "model.layers.11.mlp.gate_proj": {
1265
- "group_size": 64,
1266
- "bits": 2
1267
- },
1268
- "model.layers.11.mlp.down_proj": {
1269
- "group_size": 64,
1270
- "bits": 4
1271
- },
1272
- "model.layers.11.mlp.up_proj": {
1273
- "group_size": 64,
1274
- "bits": 2
1275
- },
1276
- "model.layers.11.input_layernorm": false,
1277
- "model.layers.11.post_attention_layernorm": false,
1278
- "model.layers.12.self_attn.q_proj": {
1279
- "group_size": 64,
1280
- "bits": 2
1281
- },
1282
- "model.layers.12.self_attn.k_proj": {
1283
- "group_size": 64,
1284
- "bits": 2
1285
- },
1286
- "model.layers.12.self_attn.v_proj": {
1287
- "group_size": 64,
1288
- "bits": 2
1289
- },
1290
- "model.layers.12.self_attn.o_proj": {
1291
- "group_size": 64,
1292
- "bits": 2
1293
- },
1294
- "model.layers.12.self_attn.rope": false,
1295
- "model.layers.12.mlp.gate_proj": {
1296
- "group_size": 64,
1297
- "bits": 2
1298
- },
1299
- "model.layers.12.mlp.down_proj": {
1300
- "group_size": 64,
1301
- "bits": 2
1302
- },
1303
- "model.layers.12.mlp.up_proj": {
1304
- "group_size": 64,
1305
- "bits": 2
1306
- },
1307
- "model.layers.12.input_layernorm": false,
1308
- "model.layers.12.post_attention_layernorm": false,
1309
- "model.layers.13.self_attn.q_proj": {
1310
- "group_size": 64,
1311
- "bits": 2
1312
- },
1313
- "model.layers.13.self_attn.k_proj": {
1314
- "group_size": 64,
1315
- "bits": 2
1316
- },
1317
- "model.layers.13.self_attn.v_proj": {
1318
- "group_size": 64,
1319
- "bits": 2
1320
- },
1321
- "model.layers.13.self_attn.o_proj": {
1322
- "group_size": 64,
1323
- "bits": 2
1324
- },
1325
- "model.layers.13.self_attn.rope": false,
1326
- "model.layers.13.mlp.gate_proj": {
1327
- "group_size": 64,
1328
- "bits": 2
1329
- },
1330
- "model.layers.13.mlp.down_proj": {
1331
- "group_size": 64,
1332
- "bits": 2
1333
- },
1334
- "model.layers.13.mlp.up_proj": {
1335
- "group_size": 64,
1336
- "bits": 2
1337
- },
1338
- "model.layers.13.input_layernorm": false,
1339
- "model.layers.13.post_attention_layernorm": false,
1340
- "model.layers.14.self_attn.q_proj": {
1341
- "group_size": 64,
1342
- "bits": 2
1343
- },
1344
- "model.layers.14.self_attn.k_proj": {
1345
- "group_size": 64,
1346
- "bits": 2
1347
- },
1348
- "model.layers.14.self_attn.v_proj": {
1349
- "group_size": 64,
1350
- "bits": 4
1351
- },
1352
- "model.layers.14.self_attn.o_proj": {
1353
- "group_size": 64,
1354
- "bits": 2
1355
- },
1356
- "model.layers.14.self_attn.rope": false,
1357
- "model.layers.14.mlp.gate_proj": {
1358
- "group_size": 64,
1359
- "bits": 2
1360
- },
1361
- "model.layers.14.mlp.down_proj": {
1362
- "group_size": 64,
1363
- "bits": 4
1364
- },
1365
- "model.layers.14.mlp.up_proj": {
1366
- "group_size": 64,
1367
- "bits": 2
1368
- },
1369
- "model.layers.14.input_layernorm": false,
1370
- "model.layers.14.post_attention_layernorm": false,
1371
- "model.layers.15.self_attn.q_proj": {
1372
- "group_size": 64,
1373
- "bits": 2
1374
- },
1375
- "model.layers.15.self_attn.k_proj": {
1376
- "group_size": 64,
1377
- "bits": 2
1378
- },
1379
- "model.layers.15.self_attn.v_proj": {
1380
- "group_size": 64,
1381
- "bits": 2
1382
- },
1383
- "model.layers.15.self_attn.o_proj": {
1384
- "group_size": 64,
1385
- "bits": 2
1386
- },
1387
- "model.layers.15.self_attn.rope": false,
1388
- "model.layers.15.mlp.gate_proj": {
1389
- "group_size": 64,
1390
- "bits": 2
1391
- },
1392
- "model.layers.15.mlp.down_proj": {
1393
- "group_size": 64,
1394
- "bits": 2
1395
- },
1396
- "model.layers.15.mlp.up_proj": {
1397
- "group_size": 64,
1398
- "bits": 2
1399
- },
1400
- "model.layers.15.input_layernorm": false,
1401
- "model.layers.15.post_attention_layernorm": false,
1402
- "model.layers.16.self_attn.q_proj": {
1403
- "group_size": 64,
1404
- "bits": 2
1405
- },
1406
- "model.layers.16.self_attn.k_proj": {
1407
- "group_size": 64,
1408
- "bits": 2
1409
- },
1410
- "model.layers.16.self_attn.v_proj": {
1411
- "group_size": 64,
1412
- "bits": 2
1413
- },
1414
- "model.layers.16.self_attn.o_proj": {
1415
- "group_size": 64,
1416
- "bits": 2
1417
- },
1418
- "model.layers.16.self_attn.rope": false,
1419
- "model.layers.16.mlp.gate_proj": {
1420
- "group_size": 64,
1421
- "bits": 2
1422
- },
1423
- "model.layers.16.mlp.down_proj": {
1424
- "group_size": 64,
1425
- "bits": 2
1426
- },
1427
- "model.layers.16.mlp.up_proj": {
1428
- "group_size": 64,
1429
- "bits": 2
1430
- },
1431
- "model.layers.16.input_layernorm": false,
1432
- "model.layers.16.post_attention_layernorm": false,
1433
- "model.layers.17.self_attn.q_proj": {
1434
- "group_size": 64,
1435
- "bits": 2
1436
- },
1437
- "model.layers.17.self_attn.k_proj": {
1438
- "group_size": 64,
1439
- "bits": 2
1440
- },
1441
- "model.layers.17.self_attn.v_proj": {
1442
- "group_size": 64,
1443
- "bits": 4
1444
- },
1445
- "model.layers.17.self_attn.o_proj": {
1446
- "group_size": 64,
1447
- "bits": 2
1448
- },
1449
- "model.layers.17.self_attn.rope": false,
1450
- "model.layers.17.mlp.gate_proj": {
1451
- "group_size": 64,
1452
- "bits": 2
1453
- },
1454
- "model.layers.17.mlp.down_proj": {
1455
- "group_size": 64,
1456
- "bits": 4
1457
- },
1458
- "model.layers.17.mlp.up_proj": {
1459
- "group_size": 64,
1460
- "bits": 2
1461
- },
1462
- "model.layers.17.input_layernorm": false,
1463
- "model.layers.17.post_attention_layernorm": false,
1464
- "model.layers.18.self_attn.q_proj": {
1465
- "group_size": 64,
1466
- "bits": 2
1467
- },
1468
- "model.layers.18.self_attn.k_proj": {
1469
- "group_size": 64,
1470
- "bits": 2
1471
- },
1472
- "model.layers.18.self_attn.v_proj": {
1473
- "group_size": 64,
1474
- "bits": 2
1475
- },
1476
- "model.layers.18.self_attn.o_proj": {
1477
- "group_size": 64,
1478
- "bits": 2
1479
- },
1480
- "model.layers.18.self_attn.rope": false,
1481
- "model.layers.18.mlp.gate_proj": {
1482
- "group_size": 64,
1483
- "bits": 2
1484
- },
1485
- "model.layers.18.mlp.down_proj": {
1486
- "group_size": 64,
1487
- "bits": 2
1488
- },
1489
- "model.layers.18.mlp.up_proj": {
1490
- "group_size": 64,
1491
- "bits": 2
1492
- },
1493
- "model.layers.18.input_layernorm": false,
1494
- "model.layers.18.post_attention_layernorm": false,
1495
- "model.layers.19.self_attn.q_proj": {
1496
- "group_size": 64,
1497
- "bits": 2
1498
- },
1499
- "model.layers.19.self_attn.k_proj": {
1500
- "group_size": 64,
1501
- "bits": 2
1502
- },
1503
- "model.layers.19.self_attn.v_proj": {
1504
- "group_size": 64,
1505
- "bits": 2
1506
- },
1507
- "model.layers.19.self_attn.o_proj": {
1508
- "group_size": 64,
1509
- "bits": 2
1510
- },
1511
- "model.layers.19.self_attn.rope": false,
1512
- "model.layers.19.mlp.gate_proj": {
1513
- "group_size": 64,
1514
- "bits": 2
1515
- },
1516
- "model.layers.19.mlp.down_proj": {
1517
- "group_size": 64,
1518
- "bits": 2
1519
- },
1520
- "model.layers.19.mlp.up_proj": {
1521
- "group_size": 64,
1522
- "bits": 2
1523
- },
1524
- "model.layers.19.input_layernorm": false,
1525
- "model.layers.19.post_attention_layernorm": false,
1526
- "model.layers.20.self_attn.q_proj": {
1527
- "group_size": 64,
1528
- "bits": 2
1529
- },
1530
- "model.layers.20.self_attn.k_proj": {
1531
- "group_size": 64,
1532
- "bits": 2
1533
- },
1534
- "model.layers.20.self_attn.v_proj": {
1535
- "group_size": 64,
1536
- "bits": 4
1537
- },
1538
- "model.layers.20.self_attn.o_proj": {
1539
- "group_size": 64,
1540
- "bits": 2
1541
- },
1542
- "model.layers.20.self_attn.rope": false,
1543
- "model.layers.20.mlp.gate_proj": {
1544
- "group_size": 64,
1545
- "bits": 2
1546
- },
1547
- "model.layers.20.mlp.down_proj": {
1548
- "group_size": 64,
1549
- "bits": 4
1550
- },
1551
- "model.layers.20.mlp.up_proj": {
1552
- "group_size": 64,
1553
- "bits": 2
1554
- },
1555
- "model.layers.20.input_layernorm": false,
1556
- "model.layers.20.post_attention_layernorm": false,
1557
- "model.layers.21.self_attn.q_proj": {
1558
- "group_size": 64,
1559
- "bits": 2
1560
- },
1561
- "model.layers.21.self_attn.k_proj": {
1562
- "group_size": 64,
1563
- "bits": 2
1564
- },
1565
- "model.layers.21.self_attn.v_proj": {
1566
- "group_size": 64,
1567
- "bits": 2
1568
- },
1569
- "model.layers.21.self_attn.o_proj": {
1570
- "group_size": 64,
1571
- "bits": 2
1572
- },
1573
- "model.layers.21.self_attn.rope": false,
1574
- "model.layers.21.mlp.gate_proj": {
1575
- "group_size": 64,
1576
- "bits": 2
1577
- },
1578
- "model.layers.21.mlp.down_proj": {
1579
- "group_size": 64,
1580
- "bits": 2
1581
- },
1582
- "model.layers.21.mlp.up_proj": {
1583
- "group_size": 64,
1584
- "bits": 2
1585
- },
1586
- "model.layers.21.input_layernorm": false,
1587
- "model.layers.21.post_attention_layernorm": false,
1588
- "model.layers.22.self_attn.q_proj": {
1589
- "group_size": 64,
1590
- "bits": 2
1591
- },
1592
- "model.layers.22.self_attn.k_proj": {
1593
- "group_size": 64,
1594
- "bits": 2
1595
- },
1596
- "model.layers.22.self_attn.v_proj": {
1597
- "group_size": 64,
1598
- "bits": 2
1599
- },
1600
- "model.layers.22.self_attn.o_proj": {
1601
- "group_size": 64,
1602
- "bits": 2
1603
- },
1604
- "model.layers.22.self_attn.rope": false,
1605
- "model.layers.22.mlp.gate_proj": {
1606
- "group_size": 64,
1607
- "bits": 2
1608
- },
1609
- "model.layers.22.mlp.down_proj": {
1610
- "group_size": 64,
1611
- "bits": 2
1612
- },
1613
- "model.layers.22.mlp.up_proj": {
1614
- "group_size": 64,
1615
- "bits": 2
1616
- },
1617
- "model.layers.22.input_layernorm": false,
1618
- "model.layers.22.post_attention_layernorm": false,
1619
- "model.layers.23.self_attn.q_proj": {
1620
- "group_size": 64,
1621
- "bits": 2
1622
- },
1623
- "model.layers.23.self_attn.k_proj": {
1624
- "group_size": 64,
1625
- "bits": 2
1626
- },
1627
- "model.layers.23.self_attn.v_proj": {
1628
- "group_size": 64,
1629
- "bits": 4
1630
- },
1631
- "model.layers.23.self_attn.o_proj": {
1632
- "group_size": 64,
1633
- "bits": 2
1634
- },
1635
- "model.layers.23.self_attn.rope": false,
1636
- "model.layers.23.mlp.gate_proj": {
1637
- "group_size": 64,
1638
- "bits": 2
1639
- },
1640
- "model.layers.23.mlp.down_proj": {
1641
- "group_size": 64,
1642
- "bits": 4
1643
- },
1644
- "model.layers.23.mlp.up_proj": {
1645
- "group_size": 64,
1646
- "bits": 2
1647
- },
1648
- "model.layers.23.input_layernorm": false,
1649
- "model.layers.23.post_attention_layernorm": false,
1650
- "model.layers.24.self_attn.q_proj": {
1651
- "group_size": 64,
1652
- "bits": 2
1653
- },
1654
- "model.layers.24.self_attn.k_proj": {
1655
- "group_size": 64,
1656
- "bits": 2
1657
- },
1658
- "model.layers.24.self_attn.v_proj": {
1659
- "group_size": 64,
1660
- "bits": 4
1661
- },
1662
- "model.layers.24.self_attn.o_proj": {
1663
- "group_size": 64,
1664
- "bits": 2
1665
- },
1666
- "model.layers.24.self_attn.rope": false,
1667
- "model.layers.24.mlp.gate_proj": {
1668
- "group_size": 64,
1669
- "bits": 2
1670
- },
1671
- "model.layers.24.mlp.down_proj": {
1672
- "group_size": 64,
1673
- "bits": 4
1674
- },
1675
- "model.layers.24.mlp.up_proj": {
1676
- "group_size": 64,
1677
- "bits": 2
1678
- },
1679
- "model.layers.24.input_layernorm": false,
1680
- "model.layers.24.post_attention_layernorm": false,
1681
- "model.layers.25.self_attn.q_proj": {
1682
- "group_size": 64,
1683
- "bits": 2
1684
- },
1685
- "model.layers.25.self_attn.k_proj": {
1686
- "group_size": 64,
1687
- "bits": 2
1688
- },
1689
- "model.layers.25.self_attn.v_proj": {
1690
- "group_size": 64,
1691
- "bits": 4
1692
- },
1693
- "model.layers.25.self_attn.o_proj": {
1694
- "group_size": 64,
1695
- "bits": 2
1696
- },
1697
- "model.layers.25.self_attn.rope": false,
1698
- "model.layers.25.mlp.gate_proj": {
1699
- "group_size": 64,
1700
- "bits": 2
1701
- },
1702
- "model.layers.25.mlp.down_proj": {
1703
- "group_size": 64,
1704
- "bits": 4
1705
- },
1706
- "model.layers.25.mlp.up_proj": {
1707
- "group_size": 64,
1708
- "bits": 2
1709
- },
1710
- "model.layers.25.input_layernorm": false,
1711
- "model.layers.25.post_attention_layernorm": false,
1712
- "model.layers.26.self_attn.q_proj": {
1713
- "group_size": 64,
1714
- "bits": 2
1715
- },
1716
- "model.layers.26.self_attn.k_proj": {
1717
- "group_size": 64,
1718
- "bits": 2
1719
- },
1720
- "model.layers.26.self_attn.v_proj": {
1721
- "group_size": 64,
1722
- "bits": 4
1723
- },
1724
- "model.layers.26.self_attn.o_proj": {
1725
- "group_size": 64,
1726
- "bits": 2
1727
- },
1728
- "model.layers.26.self_attn.rope": false,
1729
- "model.layers.26.mlp.gate_proj": {
1730
- "group_size": 64,
1731
- "bits": 2
1732
- },
1733
- "model.layers.26.mlp.down_proj": {
1734
- "group_size": 64,
1735
- "bits": 4
1736
- },
1737
- "model.layers.26.mlp.up_proj": {
1738
- "group_size": 64,
1739
- "bits": 2
1740
- },
1741
- "model.layers.26.input_layernorm": false,
1742
- "model.layers.26.post_attention_layernorm": false,
1743
- "model.layers.27.self_attn.q_proj": {
1744
- "group_size": 64,
1745
- "bits": 2
1746
- },
1747
- "model.layers.27.self_attn.k_proj": {
1748
- "group_size": 64,
1749
- "bits": 2
1750
- },
1751
- "model.layers.27.self_attn.v_proj": {
1752
- "group_size": 64,
1753
- "bits": 4
1754
- },
1755
- "model.layers.27.self_attn.o_proj": {
1756
- "group_size": 64,
1757
- "bits": 2
1758
- },
1759
- "model.layers.27.self_attn.rope": false,
1760
- "model.layers.27.mlp.gate_proj": {
1761
- "group_size": 64,
1762
- "bits": 2
1763
- },
1764
- "model.layers.27.mlp.down_proj": {
1765
- "group_size": 64,
1766
- "bits": 4
1767
- },
1768
- "model.layers.27.mlp.up_proj": {
1769
- "group_size": 64,
1770
- "bits": 2
1771
- },
1772
- "model.layers.27.input_layernorm": false,
1773
- "model.layers.27.post_attention_layernorm": false,
1774
- "model.norm": false,
1775
- "lm_head": {
1776
- "group_size": 64,
1777
- "bits": 4
1778
- }
1779
- },
1780
- "rms_norm_eps": 1e-06,
1781
- "rope_theta": 10000,
1782
- "sliding_window": 4096,
1783
- "tie_word_embeddings": false,
1784
- "torch_dtype": "bfloat16",
1785
- "transformers_version": "4.44.0",
1786
- "use_cache": true,
1787
- "use_mrope": false,
1788
- "use_sliding_window": false,
1789
- "vocab_size": 152064
1790
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:36644c61980cd34168d170bc9bb5897f51f95e3727f30aa4af280bbcf7b06785
3
- size 2760830621
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/model.safetensors.index.json DELETED
@@ -1,742 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 2760748032
4
- },
5
- "weight_map": {
6
- "lm_head.biases": "model.safetensors",
7
- "lm_head.scales": "model.safetensors",
8
- "lm_head.weight": "model.safetensors",
9
- "model.embed_tokens.biases": "model.safetensors",
10
- "model.embed_tokens.scales": "model.safetensors",
11
- "model.embed_tokens.weight": "model.safetensors",
12
- "model.layers.0.input_layernorm.weight": "model.safetensors",
13
- "model.layers.0.mlp.down_proj.biases": "model.safetensors",
14
- "model.layers.0.mlp.down_proj.scales": "model.safetensors",
15
- "model.layers.0.mlp.down_proj.weight": "model.safetensors",
16
- "model.layers.0.mlp.gate_proj.biases": "model.safetensors",
17
- "model.layers.0.mlp.gate_proj.scales": "model.safetensors",
18
- "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
19
- "model.layers.0.mlp.up_proj.biases": "model.safetensors",
20
- "model.layers.0.mlp.up_proj.scales": "model.safetensors",
21
- "model.layers.0.mlp.up_proj.weight": "model.safetensors",
22
- "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
23
- "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
24
- "model.layers.0.self_attn.k_proj.biases": "model.safetensors",
25
- "model.layers.0.self_attn.k_proj.scales": "model.safetensors",
26
- "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
27
- "model.layers.0.self_attn.o_proj.biases": "model.safetensors",
28
- "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
29
- "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
30
- "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
31
- "model.layers.0.self_attn.q_proj.biases": "model.safetensors",
32
- "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
33
- "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
34
- "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
35
- "model.layers.0.self_attn.v_proj.biases": "model.safetensors",
36
- "model.layers.0.self_attn.v_proj.scales": "model.safetensors",
37
- "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
38
- "model.layers.1.input_layernorm.weight": "model.safetensors",
39
- "model.layers.1.mlp.down_proj.biases": "model.safetensors",
40
- "model.layers.1.mlp.down_proj.scales": "model.safetensors",
41
- "model.layers.1.mlp.down_proj.weight": "model.safetensors",
42
- "model.layers.1.mlp.gate_proj.biases": "model.safetensors",
43
- "model.layers.1.mlp.gate_proj.scales": "model.safetensors",
44
- "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
45
- "model.layers.1.mlp.up_proj.biases": "model.safetensors",
46
- "model.layers.1.mlp.up_proj.scales": "model.safetensors",
47
- "model.layers.1.mlp.up_proj.weight": "model.safetensors",
48
- "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
49
- "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
50
- "model.layers.1.self_attn.k_proj.biases": "model.safetensors",
51
- "model.layers.1.self_attn.k_proj.scales": "model.safetensors",
52
- "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
53
- "model.layers.1.self_attn.o_proj.biases": "model.safetensors",
54
- "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
55
- "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
56
- "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
57
- "model.layers.1.self_attn.q_proj.biases": "model.safetensors",
58
- "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
59
- "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
60
- "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
61
- "model.layers.1.self_attn.v_proj.biases": "model.safetensors",
62
- "model.layers.1.self_attn.v_proj.scales": "model.safetensors",
63
- "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
64
- "model.layers.10.input_layernorm.weight": "model.safetensors",
65
- "model.layers.10.mlp.down_proj.biases": "model.safetensors",
66
- "model.layers.10.mlp.down_proj.scales": "model.safetensors",
67
- "model.layers.10.mlp.down_proj.weight": "model.safetensors",
68
- "model.layers.10.mlp.gate_proj.biases": "model.safetensors",
69
- "model.layers.10.mlp.gate_proj.scales": "model.safetensors",
70
- "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
71
- "model.layers.10.mlp.up_proj.biases": "model.safetensors",
72
- "model.layers.10.mlp.up_proj.scales": "model.safetensors",
73
- "model.layers.10.mlp.up_proj.weight": "model.safetensors",
74
- "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
75
- "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
76
- "model.layers.10.self_attn.k_proj.biases": "model.safetensors",
77
- "model.layers.10.self_attn.k_proj.scales": "model.safetensors",
78
- "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
79
- "model.layers.10.self_attn.o_proj.biases": "model.safetensors",
80
- "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
81
- "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
82
- "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
83
- "model.layers.10.self_attn.q_proj.biases": "model.safetensors",
84
- "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
85
- "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
86
- "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
87
- "model.layers.10.self_attn.v_proj.biases": "model.safetensors",
88
- "model.layers.10.self_attn.v_proj.scales": "model.safetensors",
89
- "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
90
- "model.layers.11.input_layernorm.weight": "model.safetensors",
91
- "model.layers.11.mlp.down_proj.biases": "model.safetensors",
92
- "model.layers.11.mlp.down_proj.scales": "model.safetensors",
93
- "model.layers.11.mlp.down_proj.weight": "model.safetensors",
94
- "model.layers.11.mlp.gate_proj.biases": "model.safetensors",
95
- "model.layers.11.mlp.gate_proj.scales": "model.safetensors",
96
- "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
97
- "model.layers.11.mlp.up_proj.biases": "model.safetensors",
98
- "model.layers.11.mlp.up_proj.scales": "model.safetensors",
99
- "model.layers.11.mlp.up_proj.weight": "model.safetensors",
100
- "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
101
- "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
102
- "model.layers.11.self_attn.k_proj.biases": "model.safetensors",
103
- "model.layers.11.self_attn.k_proj.scales": "model.safetensors",
104
- "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
105
- "model.layers.11.self_attn.o_proj.biases": "model.safetensors",
106
- "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
107
- "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
108
- "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
109
- "model.layers.11.self_attn.q_proj.biases": "model.safetensors",
110
- "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
111
- "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
112
- "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
113
- "model.layers.11.self_attn.v_proj.biases": "model.safetensors",
114
- "model.layers.11.self_attn.v_proj.scales": "model.safetensors",
115
- "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
116
- "model.layers.12.input_layernorm.weight": "model.safetensors",
117
- "model.layers.12.mlp.down_proj.biases": "model.safetensors",
118
- "model.layers.12.mlp.down_proj.scales": "model.safetensors",
119
- "model.layers.12.mlp.down_proj.weight": "model.safetensors",
120
- "model.layers.12.mlp.gate_proj.biases": "model.safetensors",
121
- "model.layers.12.mlp.gate_proj.scales": "model.safetensors",
122
- "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
123
- "model.layers.12.mlp.up_proj.biases": "model.safetensors",
124
- "model.layers.12.mlp.up_proj.scales": "model.safetensors",
125
- "model.layers.12.mlp.up_proj.weight": "model.safetensors",
126
- "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
127
- "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
128
- "model.layers.12.self_attn.k_proj.biases": "model.safetensors",
129
- "model.layers.12.self_attn.k_proj.scales": "model.safetensors",
130
- "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
131
- "model.layers.12.self_attn.o_proj.biases": "model.safetensors",
132
- "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
133
- "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
134
- "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
135
- "model.layers.12.self_attn.q_proj.biases": "model.safetensors",
136
- "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
137
- "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
138
- "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
139
- "model.layers.12.self_attn.v_proj.biases": "model.safetensors",
140
- "model.layers.12.self_attn.v_proj.scales": "model.safetensors",
141
- "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
142
- "model.layers.13.input_layernorm.weight": "model.safetensors",
143
- "model.layers.13.mlp.down_proj.biases": "model.safetensors",
144
- "model.layers.13.mlp.down_proj.scales": "model.safetensors",
145
- "model.layers.13.mlp.down_proj.weight": "model.safetensors",
146
- "model.layers.13.mlp.gate_proj.biases": "model.safetensors",
147
- "model.layers.13.mlp.gate_proj.scales": "model.safetensors",
148
- "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
149
- "model.layers.13.mlp.up_proj.biases": "model.safetensors",
150
- "model.layers.13.mlp.up_proj.scales": "model.safetensors",
151
- "model.layers.13.mlp.up_proj.weight": "model.safetensors",
152
- "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
153
- "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
154
- "model.layers.13.self_attn.k_proj.biases": "model.safetensors",
155
- "model.layers.13.self_attn.k_proj.scales": "model.safetensors",
156
- "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
157
- "model.layers.13.self_attn.o_proj.biases": "model.safetensors",
158
- "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
159
- "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
160
- "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
161
- "model.layers.13.self_attn.q_proj.biases": "model.safetensors",
162
- "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
163
- "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
164
- "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
165
- "model.layers.13.self_attn.v_proj.biases": "model.safetensors",
166
- "model.layers.13.self_attn.v_proj.scales": "model.safetensors",
167
- "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
168
- "model.layers.14.input_layernorm.weight": "model.safetensors",
169
- "model.layers.14.mlp.down_proj.biases": "model.safetensors",
170
- "model.layers.14.mlp.down_proj.scales": "model.safetensors",
171
- "model.layers.14.mlp.down_proj.weight": "model.safetensors",
172
- "model.layers.14.mlp.gate_proj.biases": "model.safetensors",
173
- "model.layers.14.mlp.gate_proj.scales": "model.safetensors",
174
- "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
175
- "model.layers.14.mlp.up_proj.biases": "model.safetensors",
176
- "model.layers.14.mlp.up_proj.scales": "model.safetensors",
177
- "model.layers.14.mlp.up_proj.weight": "model.safetensors",
178
- "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
179
- "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
180
- "model.layers.14.self_attn.k_proj.biases": "model.safetensors",
181
- "model.layers.14.self_attn.k_proj.scales": "model.safetensors",
182
- "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
183
- "model.layers.14.self_attn.o_proj.biases": "model.safetensors",
184
- "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
185
- "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
186
- "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
187
- "model.layers.14.self_attn.q_proj.biases": "model.safetensors",
188
- "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
189
- "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
190
- "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
191
- "model.layers.14.self_attn.v_proj.biases": "model.safetensors",
192
- "model.layers.14.self_attn.v_proj.scales": "model.safetensors",
193
- "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
194
- "model.layers.15.input_layernorm.weight": "model.safetensors",
195
- "model.layers.15.mlp.down_proj.biases": "model.safetensors",
196
- "model.layers.15.mlp.down_proj.scales": "model.safetensors",
197
- "model.layers.15.mlp.down_proj.weight": "model.safetensors",
198
- "model.layers.15.mlp.gate_proj.biases": "model.safetensors",
199
- "model.layers.15.mlp.gate_proj.scales": "model.safetensors",
200
- "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
201
- "model.layers.15.mlp.up_proj.biases": "model.safetensors",
202
- "model.layers.15.mlp.up_proj.scales": "model.safetensors",
203
- "model.layers.15.mlp.up_proj.weight": "model.safetensors",
204
- "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
205
- "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
206
- "model.layers.15.self_attn.k_proj.biases": "model.safetensors",
207
- "model.layers.15.self_attn.k_proj.scales": "model.safetensors",
208
- "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
209
- "model.layers.15.self_attn.o_proj.biases": "model.safetensors",
210
- "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
211
- "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
212
- "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
213
- "model.layers.15.self_attn.q_proj.biases": "model.safetensors",
214
- "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
215
- "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
216
- "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
217
- "model.layers.15.self_attn.v_proj.biases": "model.safetensors",
218
- "model.layers.15.self_attn.v_proj.scales": "model.safetensors",
219
- "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
220
- "model.layers.16.input_layernorm.weight": "model.safetensors",
221
- "model.layers.16.mlp.down_proj.biases": "model.safetensors",
222
- "model.layers.16.mlp.down_proj.scales": "model.safetensors",
223
- "model.layers.16.mlp.down_proj.weight": "model.safetensors",
224
- "model.layers.16.mlp.gate_proj.biases": "model.safetensors",
225
- "model.layers.16.mlp.gate_proj.scales": "model.safetensors",
226
- "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
227
- "model.layers.16.mlp.up_proj.biases": "model.safetensors",
228
- "model.layers.16.mlp.up_proj.scales": "model.safetensors",
229
- "model.layers.16.mlp.up_proj.weight": "model.safetensors",
230
- "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
231
- "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
232
- "model.layers.16.self_attn.k_proj.biases": "model.safetensors",
233
- "model.layers.16.self_attn.k_proj.scales": "model.safetensors",
234
- "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
235
- "model.layers.16.self_attn.o_proj.biases": "model.safetensors",
236
- "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
237
- "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
238
- "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
239
- "model.layers.16.self_attn.q_proj.biases": "model.safetensors",
240
- "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
241
- "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
242
- "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
243
- "model.layers.16.self_attn.v_proj.biases": "model.safetensors",
244
- "model.layers.16.self_attn.v_proj.scales": "model.safetensors",
245
- "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
246
- "model.layers.17.input_layernorm.weight": "model.safetensors",
247
- "model.layers.17.mlp.down_proj.biases": "model.safetensors",
248
- "model.layers.17.mlp.down_proj.scales": "model.safetensors",
249
- "model.layers.17.mlp.down_proj.weight": "model.safetensors",
250
- "model.layers.17.mlp.gate_proj.biases": "model.safetensors",
251
- "model.layers.17.mlp.gate_proj.scales": "model.safetensors",
252
- "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
253
- "model.layers.17.mlp.up_proj.biases": "model.safetensors",
254
- "model.layers.17.mlp.up_proj.scales": "model.safetensors",
255
- "model.layers.17.mlp.up_proj.weight": "model.safetensors",
256
- "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
257
- "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
258
- "model.layers.17.self_attn.k_proj.biases": "model.safetensors",
259
- "model.layers.17.self_attn.k_proj.scales": "model.safetensors",
260
- "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
261
- "model.layers.17.self_attn.o_proj.biases": "model.safetensors",
262
- "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
263
- "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
264
- "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
265
- "model.layers.17.self_attn.q_proj.biases": "model.safetensors",
266
- "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
267
- "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
268
- "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
269
- "model.layers.17.self_attn.v_proj.biases": "model.safetensors",
270
- "model.layers.17.self_attn.v_proj.scales": "model.safetensors",
271
- "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
272
- "model.layers.18.input_layernorm.weight": "model.safetensors",
273
- "model.layers.18.mlp.down_proj.biases": "model.safetensors",
274
- "model.layers.18.mlp.down_proj.scales": "model.safetensors",
275
- "model.layers.18.mlp.down_proj.weight": "model.safetensors",
276
- "model.layers.18.mlp.gate_proj.biases": "model.safetensors",
277
- "model.layers.18.mlp.gate_proj.scales": "model.safetensors",
278
- "model.layers.18.mlp.gate_proj.weight": "model.safetensors",
279
- "model.layers.18.mlp.up_proj.biases": "model.safetensors",
280
- "model.layers.18.mlp.up_proj.scales": "model.safetensors",
281
- "model.layers.18.mlp.up_proj.weight": "model.safetensors",
282
- "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
283
- "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
284
- "model.layers.18.self_attn.k_proj.biases": "model.safetensors",
285
- "model.layers.18.self_attn.k_proj.scales": "model.safetensors",
286
- "model.layers.18.self_attn.k_proj.weight": "model.safetensors",
287
- "model.layers.18.self_attn.o_proj.biases": "model.safetensors",
288
- "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
289
- "model.layers.18.self_attn.o_proj.weight": "model.safetensors",
290
- "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
291
- "model.layers.18.self_attn.q_proj.biases": "model.safetensors",
292
- "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
293
- "model.layers.18.self_attn.q_proj.weight": "model.safetensors",
294
- "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
295
- "model.layers.18.self_attn.v_proj.biases": "model.safetensors",
296
- "model.layers.18.self_attn.v_proj.scales": "model.safetensors",
297
- "model.layers.18.self_attn.v_proj.weight": "model.safetensors",
298
- "model.layers.19.input_layernorm.weight": "model.safetensors",
299
- "model.layers.19.mlp.down_proj.biases": "model.safetensors",
300
- "model.layers.19.mlp.down_proj.scales": "model.safetensors",
301
- "model.layers.19.mlp.down_proj.weight": "model.safetensors",
302
- "model.layers.19.mlp.gate_proj.biases": "model.safetensors",
303
- "model.layers.19.mlp.gate_proj.scales": "model.safetensors",
304
- "model.layers.19.mlp.gate_proj.weight": "model.safetensors",
305
- "model.layers.19.mlp.up_proj.biases": "model.safetensors",
306
- "model.layers.19.mlp.up_proj.scales": "model.safetensors",
307
- "model.layers.19.mlp.up_proj.weight": "model.safetensors",
308
- "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
309
- "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
310
- "model.layers.19.self_attn.k_proj.biases": "model.safetensors",
311
- "model.layers.19.self_attn.k_proj.scales": "model.safetensors",
312
- "model.layers.19.self_attn.k_proj.weight": "model.safetensors",
313
- "model.layers.19.self_attn.o_proj.biases": "model.safetensors",
314
- "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
315
- "model.layers.19.self_attn.o_proj.weight": "model.safetensors",
316
- "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
317
- "model.layers.19.self_attn.q_proj.biases": "model.safetensors",
318
- "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
319
- "model.layers.19.self_attn.q_proj.weight": "model.safetensors",
320
- "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
321
- "model.layers.19.self_attn.v_proj.biases": "model.safetensors",
322
- "model.layers.19.self_attn.v_proj.scales": "model.safetensors",
323
- "model.layers.19.self_attn.v_proj.weight": "model.safetensors",
324
- "model.layers.2.input_layernorm.weight": "model.safetensors",
325
- "model.layers.2.mlp.down_proj.biases": "model.safetensors",
326
- "model.layers.2.mlp.down_proj.scales": "model.safetensors",
327
- "model.layers.2.mlp.down_proj.weight": "model.safetensors",
328
- "model.layers.2.mlp.gate_proj.biases": "model.safetensors",
329
- "model.layers.2.mlp.gate_proj.scales": "model.safetensors",
330
- "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
331
- "model.layers.2.mlp.up_proj.biases": "model.safetensors",
332
- "model.layers.2.mlp.up_proj.scales": "model.safetensors",
333
- "model.layers.2.mlp.up_proj.weight": "model.safetensors",
334
- "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
335
- "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
336
- "model.layers.2.self_attn.k_proj.biases": "model.safetensors",
337
- "model.layers.2.self_attn.k_proj.scales": "model.safetensors",
338
- "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
339
- "model.layers.2.self_attn.o_proj.biases": "model.safetensors",
340
- "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
341
- "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
342
- "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
343
- "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
344
- "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
345
- "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
346
- "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
347
- "model.layers.2.self_attn.v_proj.biases": "model.safetensors",
348
- "model.layers.2.self_attn.v_proj.scales": "model.safetensors",
349
- "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
350
- "model.layers.20.input_layernorm.weight": "model.safetensors",
351
- "model.layers.20.mlp.down_proj.biases": "model.safetensors",
352
- "model.layers.20.mlp.down_proj.scales": "model.safetensors",
353
- "model.layers.20.mlp.down_proj.weight": "model.safetensors",
354
- "model.layers.20.mlp.gate_proj.biases": "model.safetensors",
355
- "model.layers.20.mlp.gate_proj.scales": "model.safetensors",
356
- "model.layers.20.mlp.gate_proj.weight": "model.safetensors",
357
- "model.layers.20.mlp.up_proj.biases": "model.safetensors",
358
- "model.layers.20.mlp.up_proj.scales": "model.safetensors",
359
- "model.layers.20.mlp.up_proj.weight": "model.safetensors",
360
- "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
361
- "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
362
- "model.layers.20.self_attn.k_proj.biases": "model.safetensors",
363
- "model.layers.20.self_attn.k_proj.scales": "model.safetensors",
364
- "model.layers.20.self_attn.k_proj.weight": "model.safetensors",
365
- "model.layers.20.self_attn.o_proj.biases": "model.safetensors",
366
- "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
367
- "model.layers.20.self_attn.o_proj.weight": "model.safetensors",
368
- "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
369
- "model.layers.20.self_attn.q_proj.biases": "model.safetensors",
370
- "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
371
- "model.layers.20.self_attn.q_proj.weight": "model.safetensors",
372
- "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
373
- "model.layers.20.self_attn.v_proj.biases": "model.safetensors",
374
- "model.layers.20.self_attn.v_proj.scales": "model.safetensors",
375
- "model.layers.20.self_attn.v_proj.weight": "model.safetensors",
376
- "model.layers.21.input_layernorm.weight": "model.safetensors",
377
- "model.layers.21.mlp.down_proj.biases": "model.safetensors",
378
- "model.layers.21.mlp.down_proj.scales": "model.safetensors",
379
- "model.layers.21.mlp.down_proj.weight": "model.safetensors",
380
- "model.layers.21.mlp.gate_proj.biases": "model.safetensors",
381
- "model.layers.21.mlp.gate_proj.scales": "model.safetensors",
382
- "model.layers.21.mlp.gate_proj.weight": "model.safetensors",
383
- "model.layers.21.mlp.up_proj.biases": "model.safetensors",
384
- "model.layers.21.mlp.up_proj.scales": "model.safetensors",
385
- "model.layers.21.mlp.up_proj.weight": "model.safetensors",
386
- "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
387
- "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
388
- "model.layers.21.self_attn.k_proj.biases": "model.safetensors",
389
- "model.layers.21.self_attn.k_proj.scales": "model.safetensors",
390
- "model.layers.21.self_attn.k_proj.weight": "model.safetensors",
391
- "model.layers.21.self_attn.o_proj.biases": "model.safetensors",
392
- "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
393
- "model.layers.21.self_attn.o_proj.weight": "model.safetensors",
394
- "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
395
- "model.layers.21.self_attn.q_proj.biases": "model.safetensors",
396
- "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
397
- "model.layers.21.self_attn.q_proj.weight": "model.safetensors",
398
- "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
399
- "model.layers.21.self_attn.v_proj.biases": "model.safetensors",
400
- "model.layers.21.self_attn.v_proj.scales": "model.safetensors",
401
- "model.layers.21.self_attn.v_proj.weight": "model.safetensors",
402
- "model.layers.22.input_layernorm.weight": "model.safetensors",
403
- "model.layers.22.mlp.down_proj.biases": "model.safetensors",
404
- "model.layers.22.mlp.down_proj.scales": "model.safetensors",
405
- "model.layers.22.mlp.down_proj.weight": "model.safetensors",
406
- "model.layers.22.mlp.gate_proj.biases": "model.safetensors",
407
- "model.layers.22.mlp.gate_proj.scales": "model.safetensors",
408
- "model.layers.22.mlp.gate_proj.weight": "model.safetensors",
409
- "model.layers.22.mlp.up_proj.biases": "model.safetensors",
410
- "model.layers.22.mlp.up_proj.scales": "model.safetensors",
411
- "model.layers.22.mlp.up_proj.weight": "model.safetensors",
412
- "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
413
- "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
414
- "model.layers.22.self_attn.k_proj.biases": "model.safetensors",
415
- "model.layers.22.self_attn.k_proj.scales": "model.safetensors",
416
- "model.layers.22.self_attn.k_proj.weight": "model.safetensors",
417
- "model.layers.22.self_attn.o_proj.biases": "model.safetensors",
418
- "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
419
- "model.layers.22.self_attn.o_proj.weight": "model.safetensors",
420
- "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
421
- "model.layers.22.self_attn.q_proj.biases": "model.safetensors",
422
- "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
423
- "model.layers.22.self_attn.q_proj.weight": "model.safetensors",
424
- "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
425
- "model.layers.22.self_attn.v_proj.biases": "model.safetensors",
426
- "model.layers.22.self_attn.v_proj.scales": "model.safetensors",
427
- "model.layers.22.self_attn.v_proj.weight": "model.safetensors",
428
- "model.layers.23.input_layernorm.weight": "model.safetensors",
429
- "model.layers.23.mlp.down_proj.biases": "model.safetensors",
430
- "model.layers.23.mlp.down_proj.scales": "model.safetensors",
431
- "model.layers.23.mlp.down_proj.weight": "model.safetensors",
432
- "model.layers.23.mlp.gate_proj.biases": "model.safetensors",
433
- "model.layers.23.mlp.gate_proj.scales": "model.safetensors",
434
- "model.layers.23.mlp.gate_proj.weight": "model.safetensors",
435
- "model.layers.23.mlp.up_proj.biases": "model.safetensors",
436
- "model.layers.23.mlp.up_proj.scales": "model.safetensors",
437
- "model.layers.23.mlp.up_proj.weight": "model.safetensors",
438
- "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
439
- "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
440
- "model.layers.23.self_attn.k_proj.biases": "model.safetensors",
441
- "model.layers.23.self_attn.k_proj.scales": "model.safetensors",
442
- "model.layers.23.self_attn.k_proj.weight": "model.safetensors",
443
- "model.layers.23.self_attn.o_proj.biases": "model.safetensors",
444
- "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
445
- "model.layers.23.self_attn.o_proj.weight": "model.safetensors",
446
- "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
447
- "model.layers.23.self_attn.q_proj.biases": "model.safetensors",
448
- "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
449
- "model.layers.23.self_attn.q_proj.weight": "model.safetensors",
450
- "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
451
- "model.layers.23.self_attn.v_proj.biases": "model.safetensors",
452
- "model.layers.23.self_attn.v_proj.scales": "model.safetensors",
453
- "model.layers.23.self_attn.v_proj.weight": "model.safetensors",
454
- "model.layers.24.input_layernorm.weight": "model.safetensors",
455
- "model.layers.24.mlp.down_proj.biases": "model.safetensors",
456
- "model.layers.24.mlp.down_proj.scales": "model.safetensors",
457
- "model.layers.24.mlp.down_proj.weight": "model.safetensors",
458
- "model.layers.24.mlp.gate_proj.biases": "model.safetensors",
459
- "model.layers.24.mlp.gate_proj.scales": "model.safetensors",
460
- "model.layers.24.mlp.gate_proj.weight": "model.safetensors",
461
- "model.layers.24.mlp.up_proj.biases": "model.safetensors",
462
- "model.layers.24.mlp.up_proj.scales": "model.safetensors",
463
- "model.layers.24.mlp.up_proj.weight": "model.safetensors",
464
- "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
465
- "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
466
- "model.layers.24.self_attn.k_proj.biases": "model.safetensors",
467
- "model.layers.24.self_attn.k_proj.scales": "model.safetensors",
468
- "model.layers.24.self_attn.k_proj.weight": "model.safetensors",
469
- "model.layers.24.self_attn.o_proj.biases": "model.safetensors",
470
- "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
471
- "model.layers.24.self_attn.o_proj.weight": "model.safetensors",
472
- "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
473
- "model.layers.24.self_attn.q_proj.biases": "model.safetensors",
474
- "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
475
- "model.layers.24.self_attn.q_proj.weight": "model.safetensors",
476
- "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
477
- "model.layers.24.self_attn.v_proj.biases": "model.safetensors",
478
- "model.layers.24.self_attn.v_proj.scales": "model.safetensors",
479
- "model.layers.24.self_attn.v_proj.weight": "model.safetensors",
480
- "model.layers.25.input_layernorm.weight": "model.safetensors",
481
- "model.layers.25.mlp.down_proj.biases": "model.safetensors",
482
- "model.layers.25.mlp.down_proj.scales": "model.safetensors",
483
- "model.layers.25.mlp.down_proj.weight": "model.safetensors",
484
- "model.layers.25.mlp.gate_proj.biases": "model.safetensors",
485
- "model.layers.25.mlp.gate_proj.scales": "model.safetensors",
486
- "model.layers.25.mlp.gate_proj.weight": "model.safetensors",
487
- "model.layers.25.mlp.up_proj.biases": "model.safetensors",
488
- "model.layers.25.mlp.up_proj.scales": "model.safetensors",
489
- "model.layers.25.mlp.up_proj.weight": "model.safetensors",
490
- "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
491
- "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
492
- "model.layers.25.self_attn.k_proj.biases": "model.safetensors",
493
- "model.layers.25.self_attn.k_proj.scales": "model.safetensors",
494
- "model.layers.25.self_attn.k_proj.weight": "model.safetensors",
495
- "model.layers.25.self_attn.o_proj.biases": "model.safetensors",
496
- "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
497
- "model.layers.25.self_attn.o_proj.weight": "model.safetensors",
498
- "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
499
- "model.layers.25.self_attn.q_proj.biases": "model.safetensors",
500
- "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
501
- "model.layers.25.self_attn.q_proj.weight": "model.safetensors",
502
- "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
503
- "model.layers.25.self_attn.v_proj.biases": "model.safetensors",
504
- "model.layers.25.self_attn.v_proj.scales": "model.safetensors",
505
- "model.layers.25.self_attn.v_proj.weight": "model.safetensors",
506
- "model.layers.26.input_layernorm.weight": "model.safetensors",
507
- "model.layers.26.mlp.down_proj.biases": "model.safetensors",
508
- "model.layers.26.mlp.down_proj.scales": "model.safetensors",
509
- "model.layers.26.mlp.down_proj.weight": "model.safetensors",
510
- "model.layers.26.mlp.gate_proj.biases": "model.safetensors",
511
- "model.layers.26.mlp.gate_proj.scales": "model.safetensors",
512
- "model.layers.26.mlp.gate_proj.weight": "model.safetensors",
513
- "model.layers.26.mlp.up_proj.biases": "model.safetensors",
514
- "model.layers.26.mlp.up_proj.scales": "model.safetensors",
515
- "model.layers.26.mlp.up_proj.weight": "model.safetensors",
516
- "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
517
- "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
518
- "model.layers.26.self_attn.k_proj.biases": "model.safetensors",
519
- "model.layers.26.self_attn.k_proj.scales": "model.safetensors",
520
- "model.layers.26.self_attn.k_proj.weight": "model.safetensors",
521
- "model.layers.26.self_attn.o_proj.biases": "model.safetensors",
522
- "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
523
- "model.layers.26.self_attn.o_proj.weight": "model.safetensors",
524
- "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
525
- "model.layers.26.self_attn.q_proj.biases": "model.safetensors",
526
- "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
527
- "model.layers.26.self_attn.q_proj.weight": "model.safetensors",
528
- "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
529
- "model.layers.26.self_attn.v_proj.biases": "model.safetensors",
530
- "model.layers.26.self_attn.v_proj.scales": "model.safetensors",
531
- "model.layers.26.self_attn.v_proj.weight": "model.safetensors",
532
- "model.layers.27.input_layernorm.weight": "model.safetensors",
533
- "model.layers.27.mlp.down_proj.biases": "model.safetensors",
534
- "model.layers.27.mlp.down_proj.scales": "model.safetensors",
535
- "model.layers.27.mlp.down_proj.weight": "model.safetensors",
536
- "model.layers.27.mlp.gate_proj.biases": "model.safetensors",
537
- "model.layers.27.mlp.gate_proj.scales": "model.safetensors",
538
- "model.layers.27.mlp.gate_proj.weight": "model.safetensors",
539
- "model.layers.27.mlp.up_proj.biases": "model.safetensors",
540
- "model.layers.27.mlp.up_proj.scales": "model.safetensors",
541
- "model.layers.27.mlp.up_proj.weight": "model.safetensors",
542
- "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
543
- "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
544
- "model.layers.27.self_attn.k_proj.biases": "model.safetensors",
545
- "model.layers.27.self_attn.k_proj.scales": "model.safetensors",
546
- "model.layers.27.self_attn.k_proj.weight": "model.safetensors",
547
- "model.layers.27.self_attn.o_proj.biases": "model.safetensors",
548
- "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
549
- "model.layers.27.self_attn.o_proj.weight": "model.safetensors",
550
- "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
551
- "model.layers.27.self_attn.q_proj.biases": "model.safetensors",
552
- "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
553
- "model.layers.27.self_attn.q_proj.weight": "model.safetensors",
554
- "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
555
- "model.layers.27.self_attn.v_proj.biases": "model.safetensors",
556
- "model.layers.27.self_attn.v_proj.scales": "model.safetensors",
557
- "model.layers.27.self_attn.v_proj.weight": "model.safetensors",
558
- "model.layers.3.input_layernorm.weight": "model.safetensors",
559
- "model.layers.3.mlp.down_proj.biases": "model.safetensors",
560
- "model.layers.3.mlp.down_proj.scales": "model.safetensors",
561
- "model.layers.3.mlp.down_proj.weight": "model.safetensors",
562
- "model.layers.3.mlp.gate_proj.biases": "model.safetensors",
563
- "model.layers.3.mlp.gate_proj.scales": "model.safetensors",
564
- "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
565
- "model.layers.3.mlp.up_proj.biases": "model.safetensors",
566
- "model.layers.3.mlp.up_proj.scales": "model.safetensors",
567
- "model.layers.3.mlp.up_proj.weight": "model.safetensors",
568
- "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
569
- "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
570
- "model.layers.3.self_attn.k_proj.biases": "model.safetensors",
571
- "model.layers.3.self_attn.k_proj.scales": "model.safetensors",
572
- "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
573
- "model.layers.3.self_attn.o_proj.biases": "model.safetensors",
574
- "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
575
- "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
576
- "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
577
- "model.layers.3.self_attn.q_proj.biases": "model.safetensors",
578
- "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
579
- "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
580
- "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
581
- "model.layers.3.self_attn.v_proj.biases": "model.safetensors",
582
- "model.layers.3.self_attn.v_proj.scales": "model.safetensors",
583
- "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
584
- "model.layers.4.input_layernorm.weight": "model.safetensors",
585
- "model.layers.4.mlp.down_proj.biases": "model.safetensors",
586
- "model.layers.4.mlp.down_proj.scales": "model.safetensors",
587
- "model.layers.4.mlp.down_proj.weight": "model.safetensors",
588
- "model.layers.4.mlp.gate_proj.biases": "model.safetensors",
589
- "model.layers.4.mlp.gate_proj.scales": "model.safetensors",
590
- "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
591
- "model.layers.4.mlp.up_proj.biases": "model.safetensors",
592
- "model.layers.4.mlp.up_proj.scales": "model.safetensors",
593
- "model.layers.4.mlp.up_proj.weight": "model.safetensors",
594
- "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
595
- "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
596
- "model.layers.4.self_attn.k_proj.biases": "model.safetensors",
597
- "model.layers.4.self_attn.k_proj.scales": "model.safetensors",
598
- "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
599
- "model.layers.4.self_attn.o_proj.biases": "model.safetensors",
600
- "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
601
- "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
602
- "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
603
- "model.layers.4.self_attn.q_proj.biases": "model.safetensors",
604
- "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
605
- "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
606
- "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
607
- "model.layers.4.self_attn.v_proj.biases": "model.safetensors",
608
- "model.layers.4.self_attn.v_proj.scales": "model.safetensors",
609
- "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
610
- "model.layers.5.input_layernorm.weight": "model.safetensors",
611
- "model.layers.5.mlp.down_proj.biases": "model.safetensors",
612
- "model.layers.5.mlp.down_proj.scales": "model.safetensors",
613
- "model.layers.5.mlp.down_proj.weight": "model.safetensors",
614
- "model.layers.5.mlp.gate_proj.biases": "model.safetensors",
615
- "model.layers.5.mlp.gate_proj.scales": "model.safetensors",
616
- "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
617
- "model.layers.5.mlp.up_proj.biases": "model.safetensors",
618
- "model.layers.5.mlp.up_proj.scales": "model.safetensors",
619
- "model.layers.5.mlp.up_proj.weight": "model.safetensors",
620
- "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
621
- "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
622
- "model.layers.5.self_attn.k_proj.biases": "model.safetensors",
623
- "model.layers.5.self_attn.k_proj.scales": "model.safetensors",
624
- "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
625
- "model.layers.5.self_attn.o_proj.biases": "model.safetensors",
626
- "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
627
- "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
628
- "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
629
- "model.layers.5.self_attn.q_proj.biases": "model.safetensors",
630
- "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
631
- "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
632
- "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
633
- "model.layers.5.self_attn.v_proj.biases": "model.safetensors",
634
- "model.layers.5.self_attn.v_proj.scales": "model.safetensors",
635
- "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
636
- "model.layers.6.input_layernorm.weight": "model.safetensors",
637
- "model.layers.6.mlp.down_proj.biases": "model.safetensors",
638
- "model.layers.6.mlp.down_proj.scales": "model.safetensors",
639
- "model.layers.6.mlp.down_proj.weight": "model.safetensors",
640
- "model.layers.6.mlp.gate_proj.biases": "model.safetensors",
641
- "model.layers.6.mlp.gate_proj.scales": "model.safetensors",
642
- "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
643
- "model.layers.6.mlp.up_proj.biases": "model.safetensors",
644
- "model.layers.6.mlp.up_proj.scales": "model.safetensors",
645
- "model.layers.6.mlp.up_proj.weight": "model.safetensors",
646
- "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
647
- "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
648
- "model.layers.6.self_attn.k_proj.biases": "model.safetensors",
649
- "model.layers.6.self_attn.k_proj.scales": "model.safetensors",
650
- "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
651
- "model.layers.6.self_attn.o_proj.biases": "model.safetensors",
652
- "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
653
- "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
654
- "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
655
- "model.layers.6.self_attn.q_proj.biases": "model.safetensors",
656
- "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
657
- "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
658
- "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
659
- "model.layers.6.self_attn.v_proj.biases": "model.safetensors",
660
- "model.layers.6.self_attn.v_proj.scales": "model.safetensors",
661
- "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
662
- "model.layers.7.input_layernorm.weight": "model.safetensors",
663
- "model.layers.7.mlp.down_proj.biases": "model.safetensors",
664
- "model.layers.7.mlp.down_proj.scales": "model.safetensors",
665
- "model.layers.7.mlp.down_proj.weight": "model.safetensors",
666
- "model.layers.7.mlp.gate_proj.biases": "model.safetensors",
667
- "model.layers.7.mlp.gate_proj.scales": "model.safetensors",
668
- "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
669
- "model.layers.7.mlp.up_proj.biases": "model.safetensors",
670
- "model.layers.7.mlp.up_proj.scales": "model.safetensors",
671
- "model.layers.7.mlp.up_proj.weight": "model.safetensors",
672
- "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
673
- "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
674
- "model.layers.7.self_attn.k_proj.biases": "model.safetensors",
675
- "model.layers.7.self_attn.k_proj.scales": "model.safetensors",
676
- "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
677
- "model.layers.7.self_attn.o_proj.biases": "model.safetensors",
678
- "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
679
- "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
680
- "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
681
- "model.layers.7.self_attn.q_proj.biases": "model.safetensors",
682
- "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
683
- "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
684
- "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
685
- "model.layers.7.self_attn.v_proj.biases": "model.safetensors",
686
- "model.layers.7.self_attn.v_proj.scales": "model.safetensors",
687
- "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
688
- "model.layers.8.input_layernorm.weight": "model.safetensors",
689
- "model.layers.8.mlp.down_proj.biases": "model.safetensors",
690
- "model.layers.8.mlp.down_proj.scales": "model.safetensors",
691
- "model.layers.8.mlp.down_proj.weight": "model.safetensors",
692
- "model.layers.8.mlp.gate_proj.biases": "model.safetensors",
693
- "model.layers.8.mlp.gate_proj.scales": "model.safetensors",
694
- "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
695
- "model.layers.8.mlp.up_proj.biases": "model.safetensors",
696
- "model.layers.8.mlp.up_proj.scales": "model.safetensors",
697
- "model.layers.8.mlp.up_proj.weight": "model.safetensors",
698
- "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
699
- "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
700
- "model.layers.8.self_attn.k_proj.biases": "model.safetensors",
701
- "model.layers.8.self_attn.k_proj.scales": "model.safetensors",
702
- "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
703
- "model.layers.8.self_attn.o_proj.biases": "model.safetensors",
704
- "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
705
- "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
706
- "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
707
- "model.layers.8.self_attn.q_proj.biases": "model.safetensors",
708
- "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
709
- "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
710
- "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
711
- "model.layers.8.self_attn.v_proj.biases": "model.safetensors",
712
- "model.layers.8.self_attn.v_proj.scales": "model.safetensors",
713
- "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
714
- "model.layers.9.input_layernorm.weight": "model.safetensors",
715
- "model.layers.9.mlp.down_proj.biases": "model.safetensors",
716
- "model.layers.9.mlp.down_proj.scales": "model.safetensors",
717
- "model.layers.9.mlp.down_proj.weight": "model.safetensors",
718
- "model.layers.9.mlp.gate_proj.biases": "model.safetensors",
719
- "model.layers.9.mlp.gate_proj.scales": "model.safetensors",
720
- "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
721
- "model.layers.9.mlp.up_proj.biases": "model.safetensors",
722
- "model.layers.9.mlp.up_proj.scales": "model.safetensors",
723
- "model.layers.9.mlp.up_proj.weight": "model.safetensors",
724
- "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
725
- "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
726
- "model.layers.9.self_attn.k_proj.biases": "model.safetensors",
727
- "model.layers.9.self_attn.k_proj.scales": "model.safetensors",
728
- "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
729
- "model.layers.9.self_attn.o_proj.biases": "model.safetensors",
730
- "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
731
- "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
732
- "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
733
- "model.layers.9.self_attn.q_proj.biases": "model.safetensors",
734
- "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
735
- "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
736
- "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
737
- "model.layers.9.self_attn.v_proj.biases": "model.safetensors",
738
- "model.layers.9.self_attn.v_proj.scales": "model.safetensors",
739
- "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
740
- "model.norm.weight": "model.safetensors"
741
- }
742
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-2,6_mixed/tokenizer_config.json DELETED
@@ -1,195 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "151643": {
7
- "content": "<|end▁of▁sentence|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "151644": {
15
- "content": "<|User|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "151645": {
23
- "content": "<|Assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "151646": {
31
- "content": "<|begin▁of▁sentence|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "151647": {
39
- "content": "<|EOT|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "151648": {
47
- "content": "<think>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "151649": {
55
- "content": "</think>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "151650": {
63
- "content": "<|quad_start|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "151651": {
71
- "content": "<|quad_end|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "151652": {
79
- "content": "<|vision_start|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "151653": {
87
- "content": "<|vision_end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "151654": {
95
- "content": "<|vision_pad|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "151655": {
103
- "content": "<|image_pad|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "151656": {
111
- "content": "<|video_pad|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "151657": {
119
- "content": "<tool_call>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "151658": {
127
- "content": "</tool_call>",
128
- "lstrip": false,
129
- "normalized": false,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "151659": {
135
- "content": "<|fim_prefix|>",
136
- "lstrip": false,
137
- "normalized": false,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "151660": {
143
- "content": "<|fim_middle|>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "151661": {
151
- "content": "<|fim_suffix|>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "151662": {
159
- "content": "<|fim_pad|>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "151663": {
167
- "content": "<|repo_name|>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "151664": {
175
- "content": "<|file_sep|>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- }
182
- },
183
- "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
- "clean_up_tokenization_spaces": false,
186
- "eos_token": "<|end▁of▁sentence|>",
187
- "extra_special_tokens": {},
188
- "legacy": true,
189
- "model_max_length": 16384,
190
- "pad_token": "<|end▁of▁sentence|>",
191
- "sp_model_kwargs": {},
192
- "tokenizer_class": "LlamaTokenizerFast",
193
- "unk_token": null,
194
- "use_default_system_prompt": false
195
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/config.json DELETED
@@ -1,1790 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen2ForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization": {
19
- "group_size": 64,
20
- "bits": null,
21
- "model.embed_tokens": {
22
- "group_size": 64,
23
- "bits": 3
24
- },
25
- "model.layers.0.self_attn.q_proj": {
26
- "group_size": 64,
27
- "bits": 3
28
- },
29
- "model.layers.0.self_attn.k_proj": {
30
- "group_size": 64,
31
- "bits": 3
32
- },
33
- "model.layers.0.self_attn.v_proj": {
34
- "group_size": 64,
35
- "bits": 4
36
- },
37
- "model.layers.0.self_attn.o_proj": {
38
- "group_size": 64,
39
- "bits": 3
40
- },
41
- "model.layers.0.self_attn.rope": false,
42
- "model.layers.0.mlp.gate_proj": {
43
- "group_size": 64,
44
- "bits": 3
45
- },
46
- "model.layers.0.mlp.down_proj": {
47
- "group_size": 64,
48
- "bits": 4
49
- },
50
- "model.layers.0.mlp.up_proj": {
51
- "group_size": 64,
52
- "bits": 3
53
- },
54
- "model.layers.0.input_layernorm": false,
55
- "model.layers.0.post_attention_layernorm": false,
56
- "model.layers.1.self_attn.q_proj": {
57
- "group_size": 64,
58
- "bits": 3
59
- },
60
- "model.layers.1.self_attn.k_proj": {
61
- "group_size": 64,
62
- "bits": 3
63
- },
64
- "model.layers.1.self_attn.v_proj": {
65
- "group_size": 64,
66
- "bits": 4
67
- },
68
- "model.layers.1.self_attn.o_proj": {
69
- "group_size": 64,
70
- "bits": 3
71
- },
72
- "model.layers.1.self_attn.rope": false,
73
- "model.layers.1.mlp.gate_proj": {
74
- "group_size": 64,
75
- "bits": 3
76
- },
77
- "model.layers.1.mlp.down_proj": {
78
- "group_size": 64,
79
- "bits": 4
80
- },
81
- "model.layers.1.mlp.up_proj": {
82
- "group_size": 64,
83
- "bits": 3
84
- },
85
- "model.layers.1.input_layernorm": false,
86
- "model.layers.1.post_attention_layernorm": false,
87
- "model.layers.2.self_attn.q_proj": {
88
- "group_size": 64,
89
- "bits": 3
90
- },
91
- "model.layers.2.self_attn.k_proj": {
92
- "group_size": 64,
93
- "bits": 3
94
- },
95
- "model.layers.2.self_attn.v_proj": {
96
- "group_size": 64,
97
- "bits": 4
98
- },
99
- "model.layers.2.self_attn.o_proj": {
100
- "group_size": 64,
101
- "bits": 3
102
- },
103
- "model.layers.2.self_attn.rope": false,
104
- "model.layers.2.mlp.gate_proj": {
105
- "group_size": 64,
106
- "bits": 3
107
- },
108
- "model.layers.2.mlp.down_proj": {
109
- "group_size": 64,
110
- "bits": 4
111
- },
112
- "model.layers.2.mlp.up_proj": {
113
- "group_size": 64,
114
- "bits": 3
115
- },
116
- "model.layers.2.input_layernorm": false,
117
- "model.layers.2.post_attention_layernorm": false,
118
- "model.layers.3.self_attn.q_proj": {
119
- "group_size": 64,
120
- "bits": 3
121
- },
122
- "model.layers.3.self_attn.k_proj": {
123
- "group_size": 64,
124
- "bits": 3
125
- },
126
- "model.layers.3.self_attn.v_proj": {
127
- "group_size": 64,
128
- "bits": 3
129
- },
130
- "model.layers.3.self_attn.o_proj": {
131
- "group_size": 64,
132
- "bits": 3
133
- },
134
- "model.layers.3.self_attn.rope": false,
135
- "model.layers.3.mlp.gate_proj": {
136
- "group_size": 64,
137
- "bits": 3
138
- },
139
- "model.layers.3.mlp.down_proj": {
140
- "group_size": 64,
141
- "bits": 3
142
- },
143
- "model.layers.3.mlp.up_proj": {
144
- "group_size": 64,
145
- "bits": 3
146
- },
147
- "model.layers.3.input_layernorm": false,
148
- "model.layers.3.post_attention_layernorm": false,
149
- "model.layers.4.self_attn.q_proj": {
150
- "group_size": 64,
151
- "bits": 3
152
- },
153
- "model.layers.4.self_attn.k_proj": {
154
- "group_size": 64,
155
- "bits": 3
156
- },
157
- "model.layers.4.self_attn.v_proj": {
158
- "group_size": 64,
159
- "bits": 3
160
- },
161
- "model.layers.4.self_attn.o_proj": {
162
- "group_size": 64,
163
- "bits": 3
164
- },
165
- "model.layers.4.self_attn.rope": false,
166
- "model.layers.4.mlp.gate_proj": {
167
- "group_size": 64,
168
- "bits": 3
169
- },
170
- "model.layers.4.mlp.down_proj": {
171
- "group_size": 64,
172
- "bits": 3
173
- },
174
- "model.layers.4.mlp.up_proj": {
175
- "group_size": 64,
176
- "bits": 3
177
- },
178
- "model.layers.4.input_layernorm": false,
179
- "model.layers.4.post_attention_layernorm": false,
180
- "model.layers.5.self_attn.q_proj": {
181
- "group_size": 64,
182
- "bits": 3
183
- },
184
- "model.layers.5.self_attn.k_proj": {
185
- "group_size": 64,
186
- "bits": 3
187
- },
188
- "model.layers.5.self_attn.v_proj": {
189
- "group_size": 64,
190
- "bits": 4
191
- },
192
- "model.layers.5.self_attn.o_proj": {
193
- "group_size": 64,
194
- "bits": 3
195
- },
196
- "model.layers.5.self_attn.rope": false,
197
- "model.layers.5.mlp.gate_proj": {
198
- "group_size": 64,
199
- "bits": 3
200
- },
201
- "model.layers.5.mlp.down_proj": {
202
- "group_size": 64,
203
- "bits": 4
204
- },
205
- "model.layers.5.mlp.up_proj": {
206
- "group_size": 64,
207
- "bits": 3
208
- },
209
- "model.layers.5.input_layernorm": false,
210
- "model.layers.5.post_attention_layernorm": false,
211
- "model.layers.6.self_attn.q_proj": {
212
- "group_size": 64,
213
- "bits": 3
214
- },
215
- "model.layers.6.self_attn.k_proj": {
216
- "group_size": 64,
217
- "bits": 3
218
- },
219
- "model.layers.6.self_attn.v_proj": {
220
- "group_size": 64,
221
- "bits": 3
222
- },
223
- "model.layers.6.self_attn.o_proj": {
224
- "group_size": 64,
225
- "bits": 3
226
- },
227
- "model.layers.6.self_attn.rope": false,
228
- "model.layers.6.mlp.gate_proj": {
229
- "group_size": 64,
230
- "bits": 3
231
- },
232
- "model.layers.6.mlp.down_proj": {
233
- "group_size": 64,
234
- "bits": 3
235
- },
236
- "model.layers.6.mlp.up_proj": {
237
- "group_size": 64,
238
- "bits": 3
239
- },
240
- "model.layers.6.input_layernorm": false,
241
- "model.layers.6.post_attention_layernorm": false,
242
- "model.layers.7.self_attn.q_proj": {
243
- "group_size": 64,
244
- "bits": 3
245
- },
246
- "model.layers.7.self_attn.k_proj": {
247
- "group_size": 64,
248
- "bits": 3
249
- },
250
- "model.layers.7.self_attn.v_proj": {
251
- "group_size": 64,
252
- "bits": 3
253
- },
254
- "model.layers.7.self_attn.o_proj": {
255
- "group_size": 64,
256
- "bits": 3
257
- },
258
- "model.layers.7.self_attn.rope": false,
259
- "model.layers.7.mlp.gate_proj": {
260
- "group_size": 64,
261
- "bits": 3
262
- },
263
- "model.layers.7.mlp.down_proj": {
264
- "group_size": 64,
265
- "bits": 3
266
- },
267
- "model.layers.7.mlp.up_proj": {
268
- "group_size": 64,
269
- "bits": 3
270
- },
271
- "model.layers.7.input_layernorm": false,
272
- "model.layers.7.post_attention_layernorm": false,
273
- "model.layers.8.self_attn.q_proj": {
274
- "group_size": 64,
275
- "bits": 3
276
- },
277
- "model.layers.8.self_attn.k_proj": {
278
- "group_size": 64,
279
- "bits": 3
280
- },
281
- "model.layers.8.self_attn.v_proj": {
282
- "group_size": 64,
283
- "bits": 4
284
- },
285
- "model.layers.8.self_attn.o_proj": {
286
- "group_size": 64,
287
- "bits": 3
288
- },
289
- "model.layers.8.self_attn.rope": false,
290
- "model.layers.8.mlp.gate_proj": {
291
- "group_size": 64,
292
- "bits": 3
293
- },
294
- "model.layers.8.mlp.down_proj": {
295
- "group_size": 64,
296
- "bits": 4
297
- },
298
- "model.layers.8.mlp.up_proj": {
299
- "group_size": 64,
300
- "bits": 3
301
- },
302
- "model.layers.8.input_layernorm": false,
303
- "model.layers.8.post_attention_layernorm": false,
304
- "model.layers.9.self_attn.q_proj": {
305
- "group_size": 64,
306
- "bits": 3
307
- },
308
- "model.layers.9.self_attn.k_proj": {
309
- "group_size": 64,
310
- "bits": 3
311
- },
312
- "model.layers.9.self_attn.v_proj": {
313
- "group_size": 64,
314
- "bits": 3
315
- },
316
- "model.layers.9.self_attn.o_proj": {
317
- "group_size": 64,
318
- "bits": 3
319
- },
320
- "model.layers.9.self_attn.rope": false,
321
- "model.layers.9.mlp.gate_proj": {
322
- "group_size": 64,
323
- "bits": 3
324
- },
325
- "model.layers.9.mlp.down_proj": {
326
- "group_size": 64,
327
- "bits": 3
328
- },
329
- "model.layers.9.mlp.up_proj": {
330
- "group_size": 64,
331
- "bits": 3
332
- },
333
- "model.layers.9.input_layernorm": false,
334
- "model.layers.9.post_attention_layernorm": false,
335
- "model.layers.10.self_attn.q_proj": {
336
- "group_size": 64,
337
- "bits": 3
338
- },
339
- "model.layers.10.self_attn.k_proj": {
340
- "group_size": 64,
341
- "bits": 3
342
- },
343
- "model.layers.10.self_attn.v_proj": {
344
- "group_size": 64,
345
- "bits": 3
346
- },
347
- "model.layers.10.self_attn.o_proj": {
348
- "group_size": 64,
349
- "bits": 3
350
- },
351
- "model.layers.10.self_attn.rope": false,
352
- "model.layers.10.mlp.gate_proj": {
353
- "group_size": 64,
354
- "bits": 3
355
- },
356
- "model.layers.10.mlp.down_proj": {
357
- "group_size": 64,
358
- "bits": 3
359
- },
360
- "model.layers.10.mlp.up_proj": {
361
- "group_size": 64,
362
- "bits": 3
363
- },
364
- "model.layers.10.input_layernorm": false,
365
- "model.layers.10.post_attention_layernorm": false,
366
- "model.layers.11.self_attn.q_proj": {
367
- "group_size": 64,
368
- "bits": 3
369
- },
370
- "model.layers.11.self_attn.k_proj": {
371
- "group_size": 64,
372
- "bits": 3
373
- },
374
- "model.layers.11.self_attn.v_proj": {
375
- "group_size": 64,
376
- "bits": 4
377
- },
378
- "model.layers.11.self_attn.o_proj": {
379
- "group_size": 64,
380
- "bits": 3
381
- },
382
- "model.layers.11.self_attn.rope": false,
383
- "model.layers.11.mlp.gate_proj": {
384
- "group_size": 64,
385
- "bits": 3
386
- },
387
- "model.layers.11.mlp.down_proj": {
388
- "group_size": 64,
389
- "bits": 4
390
- },
391
- "model.layers.11.mlp.up_proj": {
392
- "group_size": 64,
393
- "bits": 3
394
- },
395
- "model.layers.11.input_layernorm": false,
396
- "model.layers.11.post_attention_layernorm": false,
397
- "model.layers.12.self_attn.q_proj": {
398
- "group_size": 64,
399
- "bits": 3
400
- },
401
- "model.layers.12.self_attn.k_proj": {
402
- "group_size": 64,
403
- "bits": 3
404
- },
405
- "model.layers.12.self_attn.v_proj": {
406
- "group_size": 64,
407
- "bits": 3
408
- },
409
- "model.layers.12.self_attn.o_proj": {
410
- "group_size": 64,
411
- "bits": 3
412
- },
413
- "model.layers.12.self_attn.rope": false,
414
- "model.layers.12.mlp.gate_proj": {
415
- "group_size": 64,
416
- "bits": 3
417
- },
418
- "model.layers.12.mlp.down_proj": {
419
- "group_size": 64,
420
- "bits": 3
421
- },
422
- "model.layers.12.mlp.up_proj": {
423
- "group_size": 64,
424
- "bits": 3
425
- },
426
- "model.layers.12.input_layernorm": false,
427
- "model.layers.12.post_attention_layernorm": false,
428
- "model.layers.13.self_attn.q_proj": {
429
- "group_size": 64,
430
- "bits": 3
431
- },
432
- "model.layers.13.self_attn.k_proj": {
433
- "group_size": 64,
434
- "bits": 3
435
- },
436
- "model.layers.13.self_attn.v_proj": {
437
- "group_size": 64,
438
- "bits": 3
439
- },
440
- "model.layers.13.self_attn.o_proj": {
441
- "group_size": 64,
442
- "bits": 3
443
- },
444
- "model.layers.13.self_attn.rope": false,
445
- "model.layers.13.mlp.gate_proj": {
446
- "group_size": 64,
447
- "bits": 3
448
- },
449
- "model.layers.13.mlp.down_proj": {
450
- "group_size": 64,
451
- "bits": 3
452
- },
453
- "model.layers.13.mlp.up_proj": {
454
- "group_size": 64,
455
- "bits": 3
456
- },
457
- "model.layers.13.input_layernorm": false,
458
- "model.layers.13.post_attention_layernorm": false,
459
- "model.layers.14.self_attn.q_proj": {
460
- "group_size": 64,
461
- "bits": 3
462
- },
463
- "model.layers.14.self_attn.k_proj": {
464
- "group_size": 64,
465
- "bits": 3
466
- },
467
- "model.layers.14.self_attn.v_proj": {
468
- "group_size": 64,
469
- "bits": 4
470
- },
471
- "model.layers.14.self_attn.o_proj": {
472
- "group_size": 64,
473
- "bits": 3
474
- },
475
- "model.layers.14.self_attn.rope": false,
476
- "model.layers.14.mlp.gate_proj": {
477
- "group_size": 64,
478
- "bits": 3
479
- },
480
- "model.layers.14.mlp.down_proj": {
481
- "group_size": 64,
482
- "bits": 4
483
- },
484
- "model.layers.14.mlp.up_proj": {
485
- "group_size": 64,
486
- "bits": 3
487
- },
488
- "model.layers.14.input_layernorm": false,
489
- "model.layers.14.post_attention_layernorm": false,
490
- "model.layers.15.self_attn.q_proj": {
491
- "group_size": 64,
492
- "bits": 3
493
- },
494
- "model.layers.15.self_attn.k_proj": {
495
- "group_size": 64,
496
- "bits": 3
497
- },
498
- "model.layers.15.self_attn.v_proj": {
499
- "group_size": 64,
500
- "bits": 3
501
- },
502
- "model.layers.15.self_attn.o_proj": {
503
- "group_size": 64,
504
- "bits": 3
505
- },
506
- "model.layers.15.self_attn.rope": false,
507
- "model.layers.15.mlp.gate_proj": {
508
- "group_size": 64,
509
- "bits": 3
510
- },
511
- "model.layers.15.mlp.down_proj": {
512
- "group_size": 64,
513
- "bits": 3
514
- },
515
- "model.layers.15.mlp.up_proj": {
516
- "group_size": 64,
517
- "bits": 3
518
- },
519
- "model.layers.15.input_layernorm": false,
520
- "model.layers.15.post_attention_layernorm": false,
521
- "model.layers.16.self_attn.q_proj": {
522
- "group_size": 64,
523
- "bits": 3
524
- },
525
- "model.layers.16.self_attn.k_proj": {
526
- "group_size": 64,
527
- "bits": 3
528
- },
529
- "model.layers.16.self_attn.v_proj": {
530
- "group_size": 64,
531
- "bits": 3
532
- },
533
- "model.layers.16.self_attn.o_proj": {
534
- "group_size": 64,
535
- "bits": 3
536
- },
537
- "model.layers.16.self_attn.rope": false,
538
- "model.layers.16.mlp.gate_proj": {
539
- "group_size": 64,
540
- "bits": 3
541
- },
542
- "model.layers.16.mlp.down_proj": {
543
- "group_size": 64,
544
- "bits": 3
545
- },
546
- "model.layers.16.mlp.up_proj": {
547
- "group_size": 64,
548
- "bits": 3
549
- },
550
- "model.layers.16.input_layernorm": false,
551
- "model.layers.16.post_attention_layernorm": false,
552
- "model.layers.17.self_attn.q_proj": {
553
- "group_size": 64,
554
- "bits": 3
555
- },
556
- "model.layers.17.self_attn.k_proj": {
557
- "group_size": 64,
558
- "bits": 3
559
- },
560
- "model.layers.17.self_attn.v_proj": {
561
- "group_size": 64,
562
- "bits": 4
563
- },
564
- "model.layers.17.self_attn.o_proj": {
565
- "group_size": 64,
566
- "bits": 3
567
- },
568
- "model.layers.17.self_attn.rope": false,
569
- "model.layers.17.mlp.gate_proj": {
570
- "group_size": 64,
571
- "bits": 3
572
- },
573
- "model.layers.17.mlp.down_proj": {
574
- "group_size": 64,
575
- "bits": 4
576
- },
577
- "model.layers.17.mlp.up_proj": {
578
- "group_size": 64,
579
- "bits": 3
580
- },
581
- "model.layers.17.input_layernorm": false,
582
- "model.layers.17.post_attention_layernorm": false,
583
- "model.layers.18.self_attn.q_proj": {
584
- "group_size": 64,
585
- "bits": 3
586
- },
587
- "model.layers.18.self_attn.k_proj": {
588
- "group_size": 64,
589
- "bits": 3
590
- },
591
- "model.layers.18.self_attn.v_proj": {
592
- "group_size": 64,
593
- "bits": 3
594
- },
595
- "model.layers.18.self_attn.o_proj": {
596
- "group_size": 64,
597
- "bits": 3
598
- },
599
- "model.layers.18.self_attn.rope": false,
600
- "model.layers.18.mlp.gate_proj": {
601
- "group_size": 64,
602
- "bits": 3
603
- },
604
- "model.layers.18.mlp.down_proj": {
605
- "group_size": 64,
606
- "bits": 3
607
- },
608
- "model.layers.18.mlp.up_proj": {
609
- "group_size": 64,
610
- "bits": 3
611
- },
612
- "model.layers.18.input_layernorm": false,
613
- "model.layers.18.post_attention_layernorm": false,
614
- "model.layers.19.self_attn.q_proj": {
615
- "group_size": 64,
616
- "bits": 3
617
- },
618
- "model.layers.19.self_attn.k_proj": {
619
- "group_size": 64,
620
- "bits": 3
621
- },
622
- "model.layers.19.self_attn.v_proj": {
623
- "group_size": 64,
624
- "bits": 3
625
- },
626
- "model.layers.19.self_attn.o_proj": {
627
- "group_size": 64,
628
- "bits": 3
629
- },
630
- "model.layers.19.self_attn.rope": false,
631
- "model.layers.19.mlp.gate_proj": {
632
- "group_size": 64,
633
- "bits": 3
634
- },
635
- "model.layers.19.mlp.down_proj": {
636
- "group_size": 64,
637
- "bits": 3
638
- },
639
- "model.layers.19.mlp.up_proj": {
640
- "group_size": 64,
641
- "bits": 3
642
- },
643
- "model.layers.19.input_layernorm": false,
644
- "model.layers.19.post_attention_layernorm": false,
645
- "model.layers.20.self_attn.q_proj": {
646
- "group_size": 64,
647
- "bits": 3
648
- },
649
- "model.layers.20.self_attn.k_proj": {
650
- "group_size": 64,
651
- "bits": 3
652
- },
653
- "model.layers.20.self_attn.v_proj": {
654
- "group_size": 64,
655
- "bits": 4
656
- },
657
- "model.layers.20.self_attn.o_proj": {
658
- "group_size": 64,
659
- "bits": 3
660
- },
661
- "model.layers.20.self_attn.rope": false,
662
- "model.layers.20.mlp.gate_proj": {
663
- "group_size": 64,
664
- "bits": 3
665
- },
666
- "model.layers.20.mlp.down_proj": {
667
- "group_size": 64,
668
- "bits": 4
669
- },
670
- "model.layers.20.mlp.up_proj": {
671
- "group_size": 64,
672
- "bits": 3
673
- },
674
- "model.layers.20.input_layernorm": false,
675
- "model.layers.20.post_attention_layernorm": false,
676
- "model.layers.21.self_attn.q_proj": {
677
- "group_size": 64,
678
- "bits": 3
679
- },
680
- "model.layers.21.self_attn.k_proj": {
681
- "group_size": 64,
682
- "bits": 3
683
- },
684
- "model.layers.21.self_attn.v_proj": {
685
- "group_size": 64,
686
- "bits": 3
687
- },
688
- "model.layers.21.self_attn.o_proj": {
689
- "group_size": 64,
690
- "bits": 3
691
- },
692
- "model.layers.21.self_attn.rope": false,
693
- "model.layers.21.mlp.gate_proj": {
694
- "group_size": 64,
695
- "bits": 3
696
- },
697
- "model.layers.21.mlp.down_proj": {
698
- "group_size": 64,
699
- "bits": 3
700
- },
701
- "model.layers.21.mlp.up_proj": {
702
- "group_size": 64,
703
- "bits": 3
704
- },
705
- "model.layers.21.input_layernorm": false,
706
- "model.layers.21.post_attention_layernorm": false,
707
- "model.layers.22.self_attn.q_proj": {
708
- "group_size": 64,
709
- "bits": 3
710
- },
711
- "model.layers.22.self_attn.k_proj": {
712
- "group_size": 64,
713
- "bits": 3
714
- },
715
- "model.layers.22.self_attn.v_proj": {
716
- "group_size": 64,
717
- "bits": 3
718
- },
719
- "model.layers.22.self_attn.o_proj": {
720
- "group_size": 64,
721
- "bits": 3
722
- },
723
- "model.layers.22.self_attn.rope": false,
724
- "model.layers.22.mlp.gate_proj": {
725
- "group_size": 64,
726
- "bits": 3
727
- },
728
- "model.layers.22.mlp.down_proj": {
729
- "group_size": 64,
730
- "bits": 3
731
- },
732
- "model.layers.22.mlp.up_proj": {
733
- "group_size": 64,
734
- "bits": 3
735
- },
736
- "model.layers.22.input_layernorm": false,
737
- "model.layers.22.post_attention_layernorm": false,
738
- "model.layers.23.self_attn.q_proj": {
739
- "group_size": 64,
740
- "bits": 3
741
- },
742
- "model.layers.23.self_attn.k_proj": {
743
- "group_size": 64,
744
- "bits": 3
745
- },
746
- "model.layers.23.self_attn.v_proj": {
747
- "group_size": 64,
748
- "bits": 4
749
- },
750
- "model.layers.23.self_attn.o_proj": {
751
- "group_size": 64,
752
- "bits": 3
753
- },
754
- "model.layers.23.self_attn.rope": false,
755
- "model.layers.23.mlp.gate_proj": {
756
- "group_size": 64,
757
- "bits": 3
758
- },
759
- "model.layers.23.mlp.down_proj": {
760
- "group_size": 64,
761
- "bits": 4
762
- },
763
- "model.layers.23.mlp.up_proj": {
764
- "group_size": 64,
765
- "bits": 3
766
- },
767
- "model.layers.23.input_layernorm": false,
768
- "model.layers.23.post_attention_layernorm": false,
769
- "model.layers.24.self_attn.q_proj": {
770
- "group_size": 64,
771
- "bits": 3
772
- },
773
- "model.layers.24.self_attn.k_proj": {
774
- "group_size": 64,
775
- "bits": 3
776
- },
777
- "model.layers.24.self_attn.v_proj": {
778
- "group_size": 64,
779
- "bits": 4
780
- },
781
- "model.layers.24.self_attn.o_proj": {
782
- "group_size": 64,
783
- "bits": 3
784
- },
785
- "model.layers.24.self_attn.rope": false,
786
- "model.layers.24.mlp.gate_proj": {
787
- "group_size": 64,
788
- "bits": 3
789
- },
790
- "model.layers.24.mlp.down_proj": {
791
- "group_size": 64,
792
- "bits": 4
793
- },
794
- "model.layers.24.mlp.up_proj": {
795
- "group_size": 64,
796
- "bits": 3
797
- },
798
- "model.layers.24.input_layernorm": false,
799
- "model.layers.24.post_attention_layernorm": false,
800
- "model.layers.25.self_attn.q_proj": {
801
- "group_size": 64,
802
- "bits": 3
803
- },
804
- "model.layers.25.self_attn.k_proj": {
805
- "group_size": 64,
806
- "bits": 3
807
- },
808
- "model.layers.25.self_attn.v_proj": {
809
- "group_size": 64,
810
- "bits": 4
811
- },
812
- "model.layers.25.self_attn.o_proj": {
813
- "group_size": 64,
814
- "bits": 3
815
- },
816
- "model.layers.25.self_attn.rope": false,
817
- "model.layers.25.mlp.gate_proj": {
818
- "group_size": 64,
819
- "bits": 3
820
- },
821
- "model.layers.25.mlp.down_proj": {
822
- "group_size": 64,
823
- "bits": 4
824
- },
825
- "model.layers.25.mlp.up_proj": {
826
- "group_size": 64,
827
- "bits": 3
828
- },
829
- "model.layers.25.input_layernorm": false,
830
- "model.layers.25.post_attention_layernorm": false,
831
- "model.layers.26.self_attn.q_proj": {
832
- "group_size": 64,
833
- "bits": 3
834
- },
835
- "model.layers.26.self_attn.k_proj": {
836
- "group_size": 64,
837
- "bits": 3
838
- },
839
- "model.layers.26.self_attn.v_proj": {
840
- "group_size": 64,
841
- "bits": 4
842
- },
843
- "model.layers.26.self_attn.o_proj": {
844
- "group_size": 64,
845
- "bits": 3
846
- },
847
- "model.layers.26.self_attn.rope": false,
848
- "model.layers.26.mlp.gate_proj": {
849
- "group_size": 64,
850
- "bits": 3
851
- },
852
- "model.layers.26.mlp.down_proj": {
853
- "group_size": 64,
854
- "bits": 4
855
- },
856
- "model.layers.26.mlp.up_proj": {
857
- "group_size": 64,
858
- "bits": 3
859
- },
860
- "model.layers.26.input_layernorm": false,
861
- "model.layers.26.post_attention_layernorm": false,
862
- "model.layers.27.self_attn.q_proj": {
863
- "group_size": 64,
864
- "bits": 3
865
- },
866
- "model.layers.27.self_attn.k_proj": {
867
- "group_size": 64,
868
- "bits": 3
869
- },
870
- "model.layers.27.self_attn.v_proj": {
871
- "group_size": 64,
872
- "bits": 4
873
- },
874
- "model.layers.27.self_attn.o_proj": {
875
- "group_size": 64,
876
- "bits": 3
877
- },
878
- "model.layers.27.self_attn.rope": false,
879
- "model.layers.27.mlp.gate_proj": {
880
- "group_size": 64,
881
- "bits": 3
882
- },
883
- "model.layers.27.mlp.down_proj": {
884
- "group_size": 64,
885
- "bits": 4
886
- },
887
- "model.layers.27.mlp.up_proj": {
888
- "group_size": 64,
889
- "bits": 3
890
- },
891
- "model.layers.27.input_layernorm": false,
892
- "model.layers.27.post_attention_layernorm": false,
893
- "model.norm": false,
894
- "lm_head": {
895
- "group_size": 64,
896
- "bits": 4
897
- }
898
- },
899
- "quantization_config": {
900
- "group_size": 64,
901
- "bits": null,
902
- "model.embed_tokens": {
903
- "group_size": 64,
904
- "bits": 3
905
- },
906
- "model.layers.0.self_attn.q_proj": {
907
- "group_size": 64,
908
- "bits": 3
909
- },
910
- "model.layers.0.self_attn.k_proj": {
911
- "group_size": 64,
912
- "bits": 3
913
- },
914
- "model.layers.0.self_attn.v_proj": {
915
- "group_size": 64,
916
- "bits": 4
917
- },
918
- "model.layers.0.self_attn.o_proj": {
919
- "group_size": 64,
920
- "bits": 3
921
- },
922
- "model.layers.0.self_attn.rope": false,
923
- "model.layers.0.mlp.gate_proj": {
924
- "group_size": 64,
925
- "bits": 3
926
- },
927
- "model.layers.0.mlp.down_proj": {
928
- "group_size": 64,
929
- "bits": 4
930
- },
931
- "model.layers.0.mlp.up_proj": {
932
- "group_size": 64,
933
- "bits": 3
934
- },
935
- "model.layers.0.input_layernorm": false,
936
- "model.layers.0.post_attention_layernorm": false,
937
- "model.layers.1.self_attn.q_proj": {
938
- "group_size": 64,
939
- "bits": 3
940
- },
941
- "model.layers.1.self_attn.k_proj": {
942
- "group_size": 64,
943
- "bits": 3
944
- },
945
- "model.layers.1.self_attn.v_proj": {
946
- "group_size": 64,
947
- "bits": 4
948
- },
949
- "model.layers.1.self_attn.o_proj": {
950
- "group_size": 64,
951
- "bits": 3
952
- },
953
- "model.layers.1.self_attn.rope": false,
954
- "model.layers.1.mlp.gate_proj": {
955
- "group_size": 64,
956
- "bits": 3
957
- },
958
- "model.layers.1.mlp.down_proj": {
959
- "group_size": 64,
960
- "bits": 4
961
- },
962
- "model.layers.1.mlp.up_proj": {
963
- "group_size": 64,
964
- "bits": 3
965
- },
966
- "model.layers.1.input_layernorm": false,
967
- "model.layers.1.post_attention_layernorm": false,
968
- "model.layers.2.self_attn.q_proj": {
969
- "group_size": 64,
970
- "bits": 3
971
- },
972
- "model.layers.2.self_attn.k_proj": {
973
- "group_size": 64,
974
- "bits": 3
975
- },
976
- "model.layers.2.self_attn.v_proj": {
977
- "group_size": 64,
978
- "bits": 4
979
- },
980
- "model.layers.2.self_attn.o_proj": {
981
- "group_size": 64,
982
- "bits": 3
983
- },
984
- "model.layers.2.self_attn.rope": false,
985
- "model.layers.2.mlp.gate_proj": {
986
- "group_size": 64,
987
- "bits": 3
988
- },
989
- "model.layers.2.mlp.down_proj": {
990
- "group_size": 64,
991
- "bits": 4
992
- },
993
- "model.layers.2.mlp.up_proj": {
994
- "group_size": 64,
995
- "bits": 3
996
- },
997
- "model.layers.2.input_layernorm": false,
998
- "model.layers.2.post_attention_layernorm": false,
999
- "model.layers.3.self_attn.q_proj": {
1000
- "group_size": 64,
1001
- "bits": 3
1002
- },
1003
- "model.layers.3.self_attn.k_proj": {
1004
- "group_size": 64,
1005
- "bits": 3
1006
- },
1007
- "model.layers.3.self_attn.v_proj": {
1008
- "group_size": 64,
1009
- "bits": 3
1010
- },
1011
- "model.layers.3.self_attn.o_proj": {
1012
- "group_size": 64,
1013
- "bits": 3
1014
- },
1015
- "model.layers.3.self_attn.rope": false,
1016
- "model.layers.3.mlp.gate_proj": {
1017
- "group_size": 64,
1018
- "bits": 3
1019
- },
1020
- "model.layers.3.mlp.down_proj": {
1021
- "group_size": 64,
1022
- "bits": 3
1023
- },
1024
- "model.layers.3.mlp.up_proj": {
1025
- "group_size": 64,
1026
- "bits": 3
1027
- },
1028
- "model.layers.3.input_layernorm": false,
1029
- "model.layers.3.post_attention_layernorm": false,
1030
- "model.layers.4.self_attn.q_proj": {
1031
- "group_size": 64,
1032
- "bits": 3
1033
- },
1034
- "model.layers.4.self_attn.k_proj": {
1035
- "group_size": 64,
1036
- "bits": 3
1037
- },
1038
- "model.layers.4.self_attn.v_proj": {
1039
- "group_size": 64,
1040
- "bits": 3
1041
- },
1042
- "model.layers.4.self_attn.o_proj": {
1043
- "group_size": 64,
1044
- "bits": 3
1045
- },
1046
- "model.layers.4.self_attn.rope": false,
1047
- "model.layers.4.mlp.gate_proj": {
1048
- "group_size": 64,
1049
- "bits": 3
1050
- },
1051
- "model.layers.4.mlp.down_proj": {
1052
- "group_size": 64,
1053
- "bits": 3
1054
- },
1055
- "model.layers.4.mlp.up_proj": {
1056
- "group_size": 64,
1057
- "bits": 3
1058
- },
1059
- "model.layers.4.input_layernorm": false,
1060
- "model.layers.4.post_attention_layernorm": false,
1061
- "model.layers.5.self_attn.q_proj": {
1062
- "group_size": 64,
1063
- "bits": 3
1064
- },
1065
- "model.layers.5.self_attn.k_proj": {
1066
- "group_size": 64,
1067
- "bits": 3
1068
- },
1069
- "model.layers.5.self_attn.v_proj": {
1070
- "group_size": 64,
1071
- "bits": 4
1072
- },
1073
- "model.layers.5.self_attn.o_proj": {
1074
- "group_size": 64,
1075
- "bits": 3
1076
- },
1077
- "model.layers.5.self_attn.rope": false,
1078
- "model.layers.5.mlp.gate_proj": {
1079
- "group_size": 64,
1080
- "bits": 3
1081
- },
1082
- "model.layers.5.mlp.down_proj": {
1083
- "group_size": 64,
1084
- "bits": 4
1085
- },
1086
- "model.layers.5.mlp.up_proj": {
1087
- "group_size": 64,
1088
- "bits": 3
1089
- },
1090
- "model.layers.5.input_layernorm": false,
1091
- "model.layers.5.post_attention_layernorm": false,
1092
- "model.layers.6.self_attn.q_proj": {
1093
- "group_size": 64,
1094
- "bits": 3
1095
- },
1096
- "model.layers.6.self_attn.k_proj": {
1097
- "group_size": 64,
1098
- "bits": 3
1099
- },
1100
- "model.layers.6.self_attn.v_proj": {
1101
- "group_size": 64,
1102
- "bits": 3
1103
- },
1104
- "model.layers.6.self_attn.o_proj": {
1105
- "group_size": 64,
1106
- "bits": 3
1107
- },
1108
- "model.layers.6.self_attn.rope": false,
1109
- "model.layers.6.mlp.gate_proj": {
1110
- "group_size": 64,
1111
- "bits": 3
1112
- },
1113
- "model.layers.6.mlp.down_proj": {
1114
- "group_size": 64,
1115
- "bits": 3
1116
- },
1117
- "model.layers.6.mlp.up_proj": {
1118
- "group_size": 64,
1119
- "bits": 3
1120
- },
1121
- "model.layers.6.input_layernorm": false,
1122
- "model.layers.6.post_attention_layernorm": false,
1123
- "model.layers.7.self_attn.q_proj": {
1124
- "group_size": 64,
1125
- "bits": 3
1126
- },
1127
- "model.layers.7.self_attn.k_proj": {
1128
- "group_size": 64,
1129
- "bits": 3
1130
- },
1131
- "model.layers.7.self_attn.v_proj": {
1132
- "group_size": 64,
1133
- "bits": 3
1134
- },
1135
- "model.layers.7.self_attn.o_proj": {
1136
- "group_size": 64,
1137
- "bits": 3
1138
- },
1139
- "model.layers.7.self_attn.rope": false,
1140
- "model.layers.7.mlp.gate_proj": {
1141
- "group_size": 64,
1142
- "bits": 3
1143
- },
1144
- "model.layers.7.mlp.down_proj": {
1145
- "group_size": 64,
1146
- "bits": 3
1147
- },
1148
- "model.layers.7.mlp.up_proj": {
1149
- "group_size": 64,
1150
- "bits": 3
1151
- },
1152
- "model.layers.7.input_layernorm": false,
1153
- "model.layers.7.post_attention_layernorm": false,
1154
- "model.layers.8.self_attn.q_proj": {
1155
- "group_size": 64,
1156
- "bits": 3
1157
- },
1158
- "model.layers.8.self_attn.k_proj": {
1159
- "group_size": 64,
1160
- "bits": 3
1161
- },
1162
- "model.layers.8.self_attn.v_proj": {
1163
- "group_size": 64,
1164
- "bits": 4
1165
- },
1166
- "model.layers.8.self_attn.o_proj": {
1167
- "group_size": 64,
1168
- "bits": 3
1169
- },
1170
- "model.layers.8.self_attn.rope": false,
1171
- "model.layers.8.mlp.gate_proj": {
1172
- "group_size": 64,
1173
- "bits": 3
1174
- },
1175
- "model.layers.8.mlp.down_proj": {
1176
- "group_size": 64,
1177
- "bits": 4
1178
- },
1179
- "model.layers.8.mlp.up_proj": {
1180
- "group_size": 64,
1181
- "bits": 3
1182
- },
1183
- "model.layers.8.input_layernorm": false,
1184
- "model.layers.8.post_attention_layernorm": false,
1185
- "model.layers.9.self_attn.q_proj": {
1186
- "group_size": 64,
1187
- "bits": 3
1188
- },
1189
- "model.layers.9.self_attn.k_proj": {
1190
- "group_size": 64,
1191
- "bits": 3
1192
- },
1193
- "model.layers.9.self_attn.v_proj": {
1194
- "group_size": 64,
1195
- "bits": 3
1196
- },
1197
- "model.layers.9.self_attn.o_proj": {
1198
- "group_size": 64,
1199
- "bits": 3
1200
- },
1201
- "model.layers.9.self_attn.rope": false,
1202
- "model.layers.9.mlp.gate_proj": {
1203
- "group_size": 64,
1204
- "bits": 3
1205
- },
1206
- "model.layers.9.mlp.down_proj": {
1207
- "group_size": 64,
1208
- "bits": 3
1209
- },
1210
- "model.layers.9.mlp.up_proj": {
1211
- "group_size": 64,
1212
- "bits": 3
1213
- },
1214
- "model.layers.9.input_layernorm": false,
1215
- "model.layers.9.post_attention_layernorm": false,
1216
- "model.layers.10.self_attn.q_proj": {
1217
- "group_size": 64,
1218
- "bits": 3
1219
- },
1220
- "model.layers.10.self_attn.k_proj": {
1221
- "group_size": 64,
1222
- "bits": 3
1223
- },
1224
- "model.layers.10.self_attn.v_proj": {
1225
- "group_size": 64,
1226
- "bits": 3
1227
- },
1228
- "model.layers.10.self_attn.o_proj": {
1229
- "group_size": 64,
1230
- "bits": 3
1231
- },
1232
- "model.layers.10.self_attn.rope": false,
1233
- "model.layers.10.mlp.gate_proj": {
1234
- "group_size": 64,
1235
- "bits": 3
1236
- },
1237
- "model.layers.10.mlp.down_proj": {
1238
- "group_size": 64,
1239
- "bits": 3
1240
- },
1241
- "model.layers.10.mlp.up_proj": {
1242
- "group_size": 64,
1243
- "bits": 3
1244
- },
1245
- "model.layers.10.input_layernorm": false,
1246
- "model.layers.10.post_attention_layernorm": false,
1247
- "model.layers.11.self_attn.q_proj": {
1248
- "group_size": 64,
1249
- "bits": 3
1250
- },
1251
- "model.layers.11.self_attn.k_proj": {
1252
- "group_size": 64,
1253
- "bits": 3
1254
- },
1255
- "model.layers.11.self_attn.v_proj": {
1256
- "group_size": 64,
1257
- "bits": 4
1258
- },
1259
- "model.layers.11.self_attn.o_proj": {
1260
- "group_size": 64,
1261
- "bits": 3
1262
- },
1263
- "model.layers.11.self_attn.rope": false,
1264
- "model.layers.11.mlp.gate_proj": {
1265
- "group_size": 64,
1266
- "bits": 3
1267
- },
1268
- "model.layers.11.mlp.down_proj": {
1269
- "group_size": 64,
1270
- "bits": 4
1271
- },
1272
- "model.layers.11.mlp.up_proj": {
1273
- "group_size": 64,
1274
- "bits": 3
1275
- },
1276
- "model.layers.11.input_layernorm": false,
1277
- "model.layers.11.post_attention_layernorm": false,
1278
- "model.layers.12.self_attn.q_proj": {
1279
- "group_size": 64,
1280
- "bits": 3
1281
- },
1282
- "model.layers.12.self_attn.k_proj": {
1283
- "group_size": 64,
1284
- "bits": 3
1285
- },
1286
- "model.layers.12.self_attn.v_proj": {
1287
- "group_size": 64,
1288
- "bits": 3
1289
- },
1290
- "model.layers.12.self_attn.o_proj": {
1291
- "group_size": 64,
1292
- "bits": 3
1293
- },
1294
- "model.layers.12.self_attn.rope": false,
1295
- "model.layers.12.mlp.gate_proj": {
1296
- "group_size": 64,
1297
- "bits": 3
1298
- },
1299
- "model.layers.12.mlp.down_proj": {
1300
- "group_size": 64,
1301
- "bits": 3
1302
- },
1303
- "model.layers.12.mlp.up_proj": {
1304
- "group_size": 64,
1305
- "bits": 3
1306
- },
1307
- "model.layers.12.input_layernorm": false,
1308
- "model.layers.12.post_attention_layernorm": false,
1309
- "model.layers.13.self_attn.q_proj": {
1310
- "group_size": 64,
1311
- "bits": 3
1312
- },
1313
- "model.layers.13.self_attn.k_proj": {
1314
- "group_size": 64,
1315
- "bits": 3
1316
- },
1317
- "model.layers.13.self_attn.v_proj": {
1318
- "group_size": 64,
1319
- "bits": 3
1320
- },
1321
- "model.layers.13.self_attn.o_proj": {
1322
- "group_size": 64,
1323
- "bits": 3
1324
- },
1325
- "model.layers.13.self_attn.rope": false,
1326
- "model.layers.13.mlp.gate_proj": {
1327
- "group_size": 64,
1328
- "bits": 3
1329
- },
1330
- "model.layers.13.mlp.down_proj": {
1331
- "group_size": 64,
1332
- "bits": 3
1333
- },
1334
- "model.layers.13.mlp.up_proj": {
1335
- "group_size": 64,
1336
- "bits": 3
1337
- },
1338
- "model.layers.13.input_layernorm": false,
1339
- "model.layers.13.post_attention_layernorm": false,
1340
- "model.layers.14.self_attn.q_proj": {
1341
- "group_size": 64,
1342
- "bits": 3
1343
- },
1344
- "model.layers.14.self_attn.k_proj": {
1345
- "group_size": 64,
1346
- "bits": 3
1347
- },
1348
- "model.layers.14.self_attn.v_proj": {
1349
- "group_size": 64,
1350
- "bits": 4
1351
- },
1352
- "model.layers.14.self_attn.o_proj": {
1353
- "group_size": 64,
1354
- "bits": 3
1355
- },
1356
- "model.layers.14.self_attn.rope": false,
1357
- "model.layers.14.mlp.gate_proj": {
1358
- "group_size": 64,
1359
- "bits": 3
1360
- },
1361
- "model.layers.14.mlp.down_proj": {
1362
- "group_size": 64,
1363
- "bits": 4
1364
- },
1365
- "model.layers.14.mlp.up_proj": {
1366
- "group_size": 64,
1367
- "bits": 3
1368
- },
1369
- "model.layers.14.input_layernorm": false,
1370
- "model.layers.14.post_attention_layernorm": false,
1371
- "model.layers.15.self_attn.q_proj": {
1372
- "group_size": 64,
1373
- "bits": 3
1374
- },
1375
- "model.layers.15.self_attn.k_proj": {
1376
- "group_size": 64,
1377
- "bits": 3
1378
- },
1379
- "model.layers.15.self_attn.v_proj": {
1380
- "group_size": 64,
1381
- "bits": 3
1382
- },
1383
- "model.layers.15.self_attn.o_proj": {
1384
- "group_size": 64,
1385
- "bits": 3
1386
- },
1387
- "model.layers.15.self_attn.rope": false,
1388
- "model.layers.15.mlp.gate_proj": {
1389
- "group_size": 64,
1390
- "bits": 3
1391
- },
1392
- "model.layers.15.mlp.down_proj": {
1393
- "group_size": 64,
1394
- "bits": 3
1395
- },
1396
- "model.layers.15.mlp.up_proj": {
1397
- "group_size": 64,
1398
- "bits": 3
1399
- },
1400
- "model.layers.15.input_layernorm": false,
1401
- "model.layers.15.post_attention_layernorm": false,
1402
- "model.layers.16.self_attn.q_proj": {
1403
- "group_size": 64,
1404
- "bits": 3
1405
- },
1406
- "model.layers.16.self_attn.k_proj": {
1407
- "group_size": 64,
1408
- "bits": 3
1409
- },
1410
- "model.layers.16.self_attn.v_proj": {
1411
- "group_size": 64,
1412
- "bits": 3
1413
- },
1414
- "model.layers.16.self_attn.o_proj": {
1415
- "group_size": 64,
1416
- "bits": 3
1417
- },
1418
- "model.layers.16.self_attn.rope": false,
1419
- "model.layers.16.mlp.gate_proj": {
1420
- "group_size": 64,
1421
- "bits": 3
1422
- },
1423
- "model.layers.16.mlp.down_proj": {
1424
- "group_size": 64,
1425
- "bits": 3
1426
- },
1427
- "model.layers.16.mlp.up_proj": {
1428
- "group_size": 64,
1429
- "bits": 3
1430
- },
1431
- "model.layers.16.input_layernorm": false,
1432
- "model.layers.16.post_attention_layernorm": false,
1433
- "model.layers.17.self_attn.q_proj": {
1434
- "group_size": 64,
1435
- "bits": 3
1436
- },
1437
- "model.layers.17.self_attn.k_proj": {
1438
- "group_size": 64,
1439
- "bits": 3
1440
- },
1441
- "model.layers.17.self_attn.v_proj": {
1442
- "group_size": 64,
1443
- "bits": 4
1444
- },
1445
- "model.layers.17.self_attn.o_proj": {
1446
- "group_size": 64,
1447
- "bits": 3
1448
- },
1449
- "model.layers.17.self_attn.rope": false,
1450
- "model.layers.17.mlp.gate_proj": {
1451
- "group_size": 64,
1452
- "bits": 3
1453
- },
1454
- "model.layers.17.mlp.down_proj": {
1455
- "group_size": 64,
1456
- "bits": 4
1457
- },
1458
- "model.layers.17.mlp.up_proj": {
1459
- "group_size": 64,
1460
- "bits": 3
1461
- },
1462
- "model.layers.17.input_layernorm": false,
1463
- "model.layers.17.post_attention_layernorm": false,
1464
- "model.layers.18.self_attn.q_proj": {
1465
- "group_size": 64,
1466
- "bits": 3
1467
- },
1468
- "model.layers.18.self_attn.k_proj": {
1469
- "group_size": 64,
1470
- "bits": 3
1471
- },
1472
- "model.layers.18.self_attn.v_proj": {
1473
- "group_size": 64,
1474
- "bits": 3
1475
- },
1476
- "model.layers.18.self_attn.o_proj": {
1477
- "group_size": 64,
1478
- "bits": 3
1479
- },
1480
- "model.layers.18.self_attn.rope": false,
1481
- "model.layers.18.mlp.gate_proj": {
1482
- "group_size": 64,
1483
- "bits": 3
1484
- },
1485
- "model.layers.18.mlp.down_proj": {
1486
- "group_size": 64,
1487
- "bits": 3
1488
- },
1489
- "model.layers.18.mlp.up_proj": {
1490
- "group_size": 64,
1491
- "bits": 3
1492
- },
1493
- "model.layers.18.input_layernorm": false,
1494
- "model.layers.18.post_attention_layernorm": false,
1495
- "model.layers.19.self_attn.q_proj": {
1496
- "group_size": 64,
1497
- "bits": 3
1498
- },
1499
- "model.layers.19.self_attn.k_proj": {
1500
- "group_size": 64,
1501
- "bits": 3
1502
- },
1503
- "model.layers.19.self_attn.v_proj": {
1504
- "group_size": 64,
1505
- "bits": 3
1506
- },
1507
- "model.layers.19.self_attn.o_proj": {
1508
- "group_size": 64,
1509
- "bits": 3
1510
- },
1511
- "model.layers.19.self_attn.rope": false,
1512
- "model.layers.19.mlp.gate_proj": {
1513
- "group_size": 64,
1514
- "bits": 3
1515
- },
1516
- "model.layers.19.mlp.down_proj": {
1517
- "group_size": 64,
1518
- "bits": 3
1519
- },
1520
- "model.layers.19.mlp.up_proj": {
1521
- "group_size": 64,
1522
- "bits": 3
1523
- },
1524
- "model.layers.19.input_layernorm": false,
1525
- "model.layers.19.post_attention_layernorm": false,
1526
- "model.layers.20.self_attn.q_proj": {
1527
- "group_size": 64,
1528
- "bits": 3
1529
- },
1530
- "model.layers.20.self_attn.k_proj": {
1531
- "group_size": 64,
1532
- "bits": 3
1533
- },
1534
- "model.layers.20.self_attn.v_proj": {
1535
- "group_size": 64,
1536
- "bits": 4
1537
- },
1538
- "model.layers.20.self_attn.o_proj": {
1539
- "group_size": 64,
1540
- "bits": 3
1541
- },
1542
- "model.layers.20.self_attn.rope": false,
1543
- "model.layers.20.mlp.gate_proj": {
1544
- "group_size": 64,
1545
- "bits": 3
1546
- },
1547
- "model.layers.20.mlp.down_proj": {
1548
- "group_size": 64,
1549
- "bits": 4
1550
- },
1551
- "model.layers.20.mlp.up_proj": {
1552
- "group_size": 64,
1553
- "bits": 3
1554
- },
1555
- "model.layers.20.input_layernorm": false,
1556
- "model.layers.20.post_attention_layernorm": false,
1557
- "model.layers.21.self_attn.q_proj": {
1558
- "group_size": 64,
1559
- "bits": 3
1560
- },
1561
- "model.layers.21.self_attn.k_proj": {
1562
- "group_size": 64,
1563
- "bits": 3
1564
- },
1565
- "model.layers.21.self_attn.v_proj": {
1566
- "group_size": 64,
1567
- "bits": 3
1568
- },
1569
- "model.layers.21.self_attn.o_proj": {
1570
- "group_size": 64,
1571
- "bits": 3
1572
- },
1573
- "model.layers.21.self_attn.rope": false,
1574
- "model.layers.21.mlp.gate_proj": {
1575
- "group_size": 64,
1576
- "bits": 3
1577
- },
1578
- "model.layers.21.mlp.down_proj": {
1579
- "group_size": 64,
1580
- "bits": 3
1581
- },
1582
- "model.layers.21.mlp.up_proj": {
1583
- "group_size": 64,
1584
- "bits": 3
1585
- },
1586
- "model.layers.21.input_layernorm": false,
1587
- "model.layers.21.post_attention_layernorm": false,
1588
- "model.layers.22.self_attn.q_proj": {
1589
- "group_size": 64,
1590
- "bits": 3
1591
- },
1592
- "model.layers.22.self_attn.k_proj": {
1593
- "group_size": 64,
1594
- "bits": 3
1595
- },
1596
- "model.layers.22.self_attn.v_proj": {
1597
- "group_size": 64,
1598
- "bits": 3
1599
- },
1600
- "model.layers.22.self_attn.o_proj": {
1601
- "group_size": 64,
1602
- "bits": 3
1603
- },
1604
- "model.layers.22.self_attn.rope": false,
1605
- "model.layers.22.mlp.gate_proj": {
1606
- "group_size": 64,
1607
- "bits": 3
1608
- },
1609
- "model.layers.22.mlp.down_proj": {
1610
- "group_size": 64,
1611
- "bits": 3
1612
- },
1613
- "model.layers.22.mlp.up_proj": {
1614
- "group_size": 64,
1615
- "bits": 3
1616
- },
1617
- "model.layers.22.input_layernorm": false,
1618
- "model.layers.22.post_attention_layernorm": false,
1619
- "model.layers.23.self_attn.q_proj": {
1620
- "group_size": 64,
1621
- "bits": 3
1622
- },
1623
- "model.layers.23.self_attn.k_proj": {
1624
- "group_size": 64,
1625
- "bits": 3
1626
- },
1627
- "model.layers.23.self_attn.v_proj": {
1628
- "group_size": 64,
1629
- "bits": 4
1630
- },
1631
- "model.layers.23.self_attn.o_proj": {
1632
- "group_size": 64,
1633
- "bits": 3
1634
- },
1635
- "model.layers.23.self_attn.rope": false,
1636
- "model.layers.23.mlp.gate_proj": {
1637
- "group_size": 64,
1638
- "bits": 3
1639
- },
1640
- "model.layers.23.mlp.down_proj": {
1641
- "group_size": 64,
1642
- "bits": 4
1643
- },
1644
- "model.layers.23.mlp.up_proj": {
1645
- "group_size": 64,
1646
- "bits": 3
1647
- },
1648
- "model.layers.23.input_layernorm": false,
1649
- "model.layers.23.post_attention_layernorm": false,
1650
- "model.layers.24.self_attn.q_proj": {
1651
- "group_size": 64,
1652
- "bits": 3
1653
- },
1654
- "model.layers.24.self_attn.k_proj": {
1655
- "group_size": 64,
1656
- "bits": 3
1657
- },
1658
- "model.layers.24.self_attn.v_proj": {
1659
- "group_size": 64,
1660
- "bits": 4
1661
- },
1662
- "model.layers.24.self_attn.o_proj": {
1663
- "group_size": 64,
1664
- "bits": 3
1665
- },
1666
- "model.layers.24.self_attn.rope": false,
1667
- "model.layers.24.mlp.gate_proj": {
1668
- "group_size": 64,
1669
- "bits": 3
1670
- },
1671
- "model.layers.24.mlp.down_proj": {
1672
- "group_size": 64,
1673
- "bits": 4
1674
- },
1675
- "model.layers.24.mlp.up_proj": {
1676
- "group_size": 64,
1677
- "bits": 3
1678
- },
1679
- "model.layers.24.input_layernorm": false,
1680
- "model.layers.24.post_attention_layernorm": false,
1681
- "model.layers.25.self_attn.q_proj": {
1682
- "group_size": 64,
1683
- "bits": 3
1684
- },
1685
- "model.layers.25.self_attn.k_proj": {
1686
- "group_size": 64,
1687
- "bits": 3
1688
- },
1689
- "model.layers.25.self_attn.v_proj": {
1690
- "group_size": 64,
1691
- "bits": 4
1692
- },
1693
- "model.layers.25.self_attn.o_proj": {
1694
- "group_size": 64,
1695
- "bits": 3
1696
- },
1697
- "model.layers.25.self_attn.rope": false,
1698
- "model.layers.25.mlp.gate_proj": {
1699
- "group_size": 64,
1700
- "bits": 3
1701
- },
1702
- "model.layers.25.mlp.down_proj": {
1703
- "group_size": 64,
1704
- "bits": 4
1705
- },
1706
- "model.layers.25.mlp.up_proj": {
1707
- "group_size": 64,
1708
- "bits": 3
1709
- },
1710
- "model.layers.25.input_layernorm": false,
1711
- "model.layers.25.post_attention_layernorm": false,
1712
- "model.layers.26.self_attn.q_proj": {
1713
- "group_size": 64,
1714
- "bits": 3
1715
- },
1716
- "model.layers.26.self_attn.k_proj": {
1717
- "group_size": 64,
1718
- "bits": 3
1719
- },
1720
- "model.layers.26.self_attn.v_proj": {
1721
- "group_size": 64,
1722
- "bits": 4
1723
- },
1724
- "model.layers.26.self_attn.o_proj": {
1725
- "group_size": 64,
1726
- "bits": 3
1727
- },
1728
- "model.layers.26.self_attn.rope": false,
1729
- "model.layers.26.mlp.gate_proj": {
1730
- "group_size": 64,
1731
- "bits": 3
1732
- },
1733
- "model.layers.26.mlp.down_proj": {
1734
- "group_size": 64,
1735
- "bits": 4
1736
- },
1737
- "model.layers.26.mlp.up_proj": {
1738
- "group_size": 64,
1739
- "bits": 3
1740
- },
1741
- "model.layers.26.input_layernorm": false,
1742
- "model.layers.26.post_attention_layernorm": false,
1743
- "model.layers.27.self_attn.q_proj": {
1744
- "group_size": 64,
1745
- "bits": 3
1746
- },
1747
- "model.layers.27.self_attn.k_proj": {
1748
- "group_size": 64,
1749
- "bits": 3
1750
- },
1751
- "model.layers.27.self_attn.v_proj": {
1752
- "group_size": 64,
1753
- "bits": 4
1754
- },
1755
- "model.layers.27.self_attn.o_proj": {
1756
- "group_size": 64,
1757
- "bits": 3
1758
- },
1759
- "model.layers.27.self_attn.rope": false,
1760
- "model.layers.27.mlp.gate_proj": {
1761
- "group_size": 64,
1762
- "bits": 3
1763
- },
1764
- "model.layers.27.mlp.down_proj": {
1765
- "group_size": 64,
1766
- "bits": 4
1767
- },
1768
- "model.layers.27.mlp.up_proj": {
1769
- "group_size": 64,
1770
- "bits": 3
1771
- },
1772
- "model.layers.27.input_layernorm": false,
1773
- "model.layers.27.post_attention_layernorm": false,
1774
- "model.norm": false,
1775
- "lm_head": {
1776
- "group_size": 64,
1777
- "bits": 4
1778
- }
1779
- },
1780
- "rms_norm_eps": 1e-06,
1781
- "rope_theta": 10000,
1782
- "sliding_window": 4096,
1783
- "tie_word_embeddings": false,
1784
- "torch_dtype": "bfloat16",
1785
- "transformers_version": "4.44.0",
1786
- "use_cache": true,
1787
- "use_mrope": false,
1788
- "use_sliding_window": false,
1789
- "vocab_size": 152064
1790
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c15fdbbbe929593ed3932fd81869fc4d933469b2db5330acf52a4b43d9975143
3
- size 3522588433
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/model.safetensors.index.json DELETED
@@ -1,742 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 3522505728
4
- },
5
- "weight_map": {
6
- "lm_head.biases": "model.safetensors",
7
- "lm_head.scales": "model.safetensors",
8
- "lm_head.weight": "model.safetensors",
9
- "model.embed_tokens.biases": "model.safetensors",
10
- "model.embed_tokens.scales": "model.safetensors",
11
- "model.embed_tokens.weight": "model.safetensors",
12
- "model.layers.0.input_layernorm.weight": "model.safetensors",
13
- "model.layers.0.mlp.down_proj.biases": "model.safetensors",
14
- "model.layers.0.mlp.down_proj.scales": "model.safetensors",
15
- "model.layers.0.mlp.down_proj.weight": "model.safetensors",
16
- "model.layers.0.mlp.gate_proj.biases": "model.safetensors",
17
- "model.layers.0.mlp.gate_proj.scales": "model.safetensors",
18
- "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
19
- "model.layers.0.mlp.up_proj.biases": "model.safetensors",
20
- "model.layers.0.mlp.up_proj.scales": "model.safetensors",
21
- "model.layers.0.mlp.up_proj.weight": "model.safetensors",
22
- "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
23
- "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
24
- "model.layers.0.self_attn.k_proj.biases": "model.safetensors",
25
- "model.layers.0.self_attn.k_proj.scales": "model.safetensors",
26
- "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
27
- "model.layers.0.self_attn.o_proj.biases": "model.safetensors",
28
- "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
29
- "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
30
- "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
31
- "model.layers.0.self_attn.q_proj.biases": "model.safetensors",
32
- "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
33
- "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
34
- "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
35
- "model.layers.0.self_attn.v_proj.biases": "model.safetensors",
36
- "model.layers.0.self_attn.v_proj.scales": "model.safetensors",
37
- "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
38
- "model.layers.1.input_layernorm.weight": "model.safetensors",
39
- "model.layers.1.mlp.down_proj.biases": "model.safetensors",
40
- "model.layers.1.mlp.down_proj.scales": "model.safetensors",
41
- "model.layers.1.mlp.down_proj.weight": "model.safetensors",
42
- "model.layers.1.mlp.gate_proj.biases": "model.safetensors",
43
- "model.layers.1.mlp.gate_proj.scales": "model.safetensors",
44
- "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
45
- "model.layers.1.mlp.up_proj.biases": "model.safetensors",
46
- "model.layers.1.mlp.up_proj.scales": "model.safetensors",
47
- "model.layers.1.mlp.up_proj.weight": "model.safetensors",
48
- "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
49
- "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
50
- "model.layers.1.self_attn.k_proj.biases": "model.safetensors",
51
- "model.layers.1.self_attn.k_proj.scales": "model.safetensors",
52
- "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
53
- "model.layers.1.self_attn.o_proj.biases": "model.safetensors",
54
- "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
55
- "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
56
- "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
57
- "model.layers.1.self_attn.q_proj.biases": "model.safetensors",
58
- "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
59
- "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
60
- "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
61
- "model.layers.1.self_attn.v_proj.biases": "model.safetensors",
62
- "model.layers.1.self_attn.v_proj.scales": "model.safetensors",
63
- "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
64
- "model.layers.10.input_layernorm.weight": "model.safetensors",
65
- "model.layers.10.mlp.down_proj.biases": "model.safetensors",
66
- "model.layers.10.mlp.down_proj.scales": "model.safetensors",
67
- "model.layers.10.mlp.down_proj.weight": "model.safetensors",
68
- "model.layers.10.mlp.gate_proj.biases": "model.safetensors",
69
- "model.layers.10.mlp.gate_proj.scales": "model.safetensors",
70
- "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
71
- "model.layers.10.mlp.up_proj.biases": "model.safetensors",
72
- "model.layers.10.mlp.up_proj.scales": "model.safetensors",
73
- "model.layers.10.mlp.up_proj.weight": "model.safetensors",
74
- "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
75
- "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
76
- "model.layers.10.self_attn.k_proj.biases": "model.safetensors",
77
- "model.layers.10.self_attn.k_proj.scales": "model.safetensors",
78
- "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
79
- "model.layers.10.self_attn.o_proj.biases": "model.safetensors",
80
- "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
81
- "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
82
- "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
83
- "model.layers.10.self_attn.q_proj.biases": "model.safetensors",
84
- "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
85
- "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
86
- "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
87
- "model.layers.10.self_attn.v_proj.biases": "model.safetensors",
88
- "model.layers.10.self_attn.v_proj.scales": "model.safetensors",
89
- "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
90
- "model.layers.11.input_layernorm.weight": "model.safetensors",
91
- "model.layers.11.mlp.down_proj.biases": "model.safetensors",
92
- "model.layers.11.mlp.down_proj.scales": "model.safetensors",
93
- "model.layers.11.mlp.down_proj.weight": "model.safetensors",
94
- "model.layers.11.mlp.gate_proj.biases": "model.safetensors",
95
- "model.layers.11.mlp.gate_proj.scales": "model.safetensors",
96
- "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
97
- "model.layers.11.mlp.up_proj.biases": "model.safetensors",
98
- "model.layers.11.mlp.up_proj.scales": "model.safetensors",
99
- "model.layers.11.mlp.up_proj.weight": "model.safetensors",
100
- "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
101
- "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
102
- "model.layers.11.self_attn.k_proj.biases": "model.safetensors",
103
- "model.layers.11.self_attn.k_proj.scales": "model.safetensors",
104
- "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
105
- "model.layers.11.self_attn.o_proj.biases": "model.safetensors",
106
- "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
107
- "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
108
- "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
109
- "model.layers.11.self_attn.q_proj.biases": "model.safetensors",
110
- "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
111
- "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
112
- "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
113
- "model.layers.11.self_attn.v_proj.biases": "model.safetensors",
114
- "model.layers.11.self_attn.v_proj.scales": "model.safetensors",
115
- "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
116
- "model.layers.12.input_layernorm.weight": "model.safetensors",
117
- "model.layers.12.mlp.down_proj.biases": "model.safetensors",
118
- "model.layers.12.mlp.down_proj.scales": "model.safetensors",
119
- "model.layers.12.mlp.down_proj.weight": "model.safetensors",
120
- "model.layers.12.mlp.gate_proj.biases": "model.safetensors",
121
- "model.layers.12.mlp.gate_proj.scales": "model.safetensors",
122
- "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
123
- "model.layers.12.mlp.up_proj.biases": "model.safetensors",
124
- "model.layers.12.mlp.up_proj.scales": "model.safetensors",
125
- "model.layers.12.mlp.up_proj.weight": "model.safetensors",
126
- "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
127
- "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
128
- "model.layers.12.self_attn.k_proj.biases": "model.safetensors",
129
- "model.layers.12.self_attn.k_proj.scales": "model.safetensors",
130
- "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
131
- "model.layers.12.self_attn.o_proj.biases": "model.safetensors",
132
- "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
133
- "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
134
- "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
135
- "model.layers.12.self_attn.q_proj.biases": "model.safetensors",
136
- "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
137
- "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
138
- "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
139
- "model.layers.12.self_attn.v_proj.biases": "model.safetensors",
140
- "model.layers.12.self_attn.v_proj.scales": "model.safetensors",
141
- "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
142
- "model.layers.13.input_layernorm.weight": "model.safetensors",
143
- "model.layers.13.mlp.down_proj.biases": "model.safetensors",
144
- "model.layers.13.mlp.down_proj.scales": "model.safetensors",
145
- "model.layers.13.mlp.down_proj.weight": "model.safetensors",
146
- "model.layers.13.mlp.gate_proj.biases": "model.safetensors",
147
- "model.layers.13.mlp.gate_proj.scales": "model.safetensors",
148
- "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
149
- "model.layers.13.mlp.up_proj.biases": "model.safetensors",
150
- "model.layers.13.mlp.up_proj.scales": "model.safetensors",
151
- "model.layers.13.mlp.up_proj.weight": "model.safetensors",
152
- "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
153
- "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
154
- "model.layers.13.self_attn.k_proj.biases": "model.safetensors",
155
- "model.layers.13.self_attn.k_proj.scales": "model.safetensors",
156
- "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
157
- "model.layers.13.self_attn.o_proj.biases": "model.safetensors",
158
- "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
159
- "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
160
- "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
161
- "model.layers.13.self_attn.q_proj.biases": "model.safetensors",
162
- "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
163
- "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
164
- "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
165
- "model.layers.13.self_attn.v_proj.biases": "model.safetensors",
166
- "model.layers.13.self_attn.v_proj.scales": "model.safetensors",
167
- "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
168
- "model.layers.14.input_layernorm.weight": "model.safetensors",
169
- "model.layers.14.mlp.down_proj.biases": "model.safetensors",
170
- "model.layers.14.mlp.down_proj.scales": "model.safetensors",
171
- "model.layers.14.mlp.down_proj.weight": "model.safetensors",
172
- "model.layers.14.mlp.gate_proj.biases": "model.safetensors",
173
- "model.layers.14.mlp.gate_proj.scales": "model.safetensors",
174
- "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
175
- "model.layers.14.mlp.up_proj.biases": "model.safetensors",
176
- "model.layers.14.mlp.up_proj.scales": "model.safetensors",
177
- "model.layers.14.mlp.up_proj.weight": "model.safetensors",
178
- "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
179
- "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
180
- "model.layers.14.self_attn.k_proj.biases": "model.safetensors",
181
- "model.layers.14.self_attn.k_proj.scales": "model.safetensors",
182
- "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
183
- "model.layers.14.self_attn.o_proj.biases": "model.safetensors",
184
- "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
185
- "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
186
- "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
187
- "model.layers.14.self_attn.q_proj.biases": "model.safetensors",
188
- "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
189
- "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
190
- "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
191
- "model.layers.14.self_attn.v_proj.biases": "model.safetensors",
192
- "model.layers.14.self_attn.v_proj.scales": "model.safetensors",
193
- "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
194
- "model.layers.15.input_layernorm.weight": "model.safetensors",
195
- "model.layers.15.mlp.down_proj.biases": "model.safetensors",
196
- "model.layers.15.mlp.down_proj.scales": "model.safetensors",
197
- "model.layers.15.mlp.down_proj.weight": "model.safetensors",
198
- "model.layers.15.mlp.gate_proj.biases": "model.safetensors",
199
- "model.layers.15.mlp.gate_proj.scales": "model.safetensors",
200
- "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
201
- "model.layers.15.mlp.up_proj.biases": "model.safetensors",
202
- "model.layers.15.mlp.up_proj.scales": "model.safetensors",
203
- "model.layers.15.mlp.up_proj.weight": "model.safetensors",
204
- "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
205
- "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
206
- "model.layers.15.self_attn.k_proj.biases": "model.safetensors",
207
- "model.layers.15.self_attn.k_proj.scales": "model.safetensors",
208
- "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
209
- "model.layers.15.self_attn.o_proj.biases": "model.safetensors",
210
- "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
211
- "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
212
- "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
213
- "model.layers.15.self_attn.q_proj.biases": "model.safetensors",
214
- "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
215
- "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
216
- "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
217
- "model.layers.15.self_attn.v_proj.biases": "model.safetensors",
218
- "model.layers.15.self_attn.v_proj.scales": "model.safetensors",
219
- "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
220
- "model.layers.16.input_layernorm.weight": "model.safetensors",
221
- "model.layers.16.mlp.down_proj.biases": "model.safetensors",
222
- "model.layers.16.mlp.down_proj.scales": "model.safetensors",
223
- "model.layers.16.mlp.down_proj.weight": "model.safetensors",
224
- "model.layers.16.mlp.gate_proj.biases": "model.safetensors",
225
- "model.layers.16.mlp.gate_proj.scales": "model.safetensors",
226
- "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
227
- "model.layers.16.mlp.up_proj.biases": "model.safetensors",
228
- "model.layers.16.mlp.up_proj.scales": "model.safetensors",
229
- "model.layers.16.mlp.up_proj.weight": "model.safetensors",
230
- "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
231
- "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
232
- "model.layers.16.self_attn.k_proj.biases": "model.safetensors",
233
- "model.layers.16.self_attn.k_proj.scales": "model.safetensors",
234
- "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
235
- "model.layers.16.self_attn.o_proj.biases": "model.safetensors",
236
- "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
237
- "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
238
- "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
239
- "model.layers.16.self_attn.q_proj.biases": "model.safetensors",
240
- "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
241
- "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
242
- "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
243
- "model.layers.16.self_attn.v_proj.biases": "model.safetensors",
244
- "model.layers.16.self_attn.v_proj.scales": "model.safetensors",
245
- "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
246
- "model.layers.17.input_layernorm.weight": "model.safetensors",
247
- "model.layers.17.mlp.down_proj.biases": "model.safetensors",
248
- "model.layers.17.mlp.down_proj.scales": "model.safetensors",
249
- "model.layers.17.mlp.down_proj.weight": "model.safetensors",
250
- "model.layers.17.mlp.gate_proj.biases": "model.safetensors",
251
- "model.layers.17.mlp.gate_proj.scales": "model.safetensors",
252
- "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
253
- "model.layers.17.mlp.up_proj.biases": "model.safetensors",
254
- "model.layers.17.mlp.up_proj.scales": "model.safetensors",
255
- "model.layers.17.mlp.up_proj.weight": "model.safetensors",
256
- "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
257
- "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
258
- "model.layers.17.self_attn.k_proj.biases": "model.safetensors",
259
- "model.layers.17.self_attn.k_proj.scales": "model.safetensors",
260
- "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
261
- "model.layers.17.self_attn.o_proj.biases": "model.safetensors",
262
- "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
263
- "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
264
- "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
265
- "model.layers.17.self_attn.q_proj.biases": "model.safetensors",
266
- "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
267
- "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
268
- "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
269
- "model.layers.17.self_attn.v_proj.biases": "model.safetensors",
270
- "model.layers.17.self_attn.v_proj.scales": "model.safetensors",
271
- "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
272
- "model.layers.18.input_layernorm.weight": "model.safetensors",
273
- "model.layers.18.mlp.down_proj.biases": "model.safetensors",
274
- "model.layers.18.mlp.down_proj.scales": "model.safetensors",
275
- "model.layers.18.mlp.down_proj.weight": "model.safetensors",
276
- "model.layers.18.mlp.gate_proj.biases": "model.safetensors",
277
- "model.layers.18.mlp.gate_proj.scales": "model.safetensors",
278
- "model.layers.18.mlp.gate_proj.weight": "model.safetensors",
279
- "model.layers.18.mlp.up_proj.biases": "model.safetensors",
280
- "model.layers.18.mlp.up_proj.scales": "model.safetensors",
281
- "model.layers.18.mlp.up_proj.weight": "model.safetensors",
282
- "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
283
- "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
284
- "model.layers.18.self_attn.k_proj.biases": "model.safetensors",
285
- "model.layers.18.self_attn.k_proj.scales": "model.safetensors",
286
- "model.layers.18.self_attn.k_proj.weight": "model.safetensors",
287
- "model.layers.18.self_attn.o_proj.biases": "model.safetensors",
288
- "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
289
- "model.layers.18.self_attn.o_proj.weight": "model.safetensors",
290
- "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
291
- "model.layers.18.self_attn.q_proj.biases": "model.safetensors",
292
- "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
293
- "model.layers.18.self_attn.q_proj.weight": "model.safetensors",
294
- "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
295
- "model.layers.18.self_attn.v_proj.biases": "model.safetensors",
296
- "model.layers.18.self_attn.v_proj.scales": "model.safetensors",
297
- "model.layers.18.self_attn.v_proj.weight": "model.safetensors",
298
- "model.layers.19.input_layernorm.weight": "model.safetensors",
299
- "model.layers.19.mlp.down_proj.biases": "model.safetensors",
300
- "model.layers.19.mlp.down_proj.scales": "model.safetensors",
301
- "model.layers.19.mlp.down_proj.weight": "model.safetensors",
302
- "model.layers.19.mlp.gate_proj.biases": "model.safetensors",
303
- "model.layers.19.mlp.gate_proj.scales": "model.safetensors",
304
- "model.layers.19.mlp.gate_proj.weight": "model.safetensors",
305
- "model.layers.19.mlp.up_proj.biases": "model.safetensors",
306
- "model.layers.19.mlp.up_proj.scales": "model.safetensors",
307
- "model.layers.19.mlp.up_proj.weight": "model.safetensors",
308
- "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
309
- "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
310
- "model.layers.19.self_attn.k_proj.biases": "model.safetensors",
311
- "model.layers.19.self_attn.k_proj.scales": "model.safetensors",
312
- "model.layers.19.self_attn.k_proj.weight": "model.safetensors",
313
- "model.layers.19.self_attn.o_proj.biases": "model.safetensors",
314
- "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
315
- "model.layers.19.self_attn.o_proj.weight": "model.safetensors",
316
- "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
317
- "model.layers.19.self_attn.q_proj.biases": "model.safetensors",
318
- "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
319
- "model.layers.19.self_attn.q_proj.weight": "model.safetensors",
320
- "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
321
- "model.layers.19.self_attn.v_proj.biases": "model.safetensors",
322
- "model.layers.19.self_attn.v_proj.scales": "model.safetensors",
323
- "model.layers.19.self_attn.v_proj.weight": "model.safetensors",
324
- "model.layers.2.input_layernorm.weight": "model.safetensors",
325
- "model.layers.2.mlp.down_proj.biases": "model.safetensors",
326
- "model.layers.2.mlp.down_proj.scales": "model.safetensors",
327
- "model.layers.2.mlp.down_proj.weight": "model.safetensors",
328
- "model.layers.2.mlp.gate_proj.biases": "model.safetensors",
329
- "model.layers.2.mlp.gate_proj.scales": "model.safetensors",
330
- "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
331
- "model.layers.2.mlp.up_proj.biases": "model.safetensors",
332
- "model.layers.2.mlp.up_proj.scales": "model.safetensors",
333
- "model.layers.2.mlp.up_proj.weight": "model.safetensors",
334
- "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
335
- "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
336
- "model.layers.2.self_attn.k_proj.biases": "model.safetensors",
337
- "model.layers.2.self_attn.k_proj.scales": "model.safetensors",
338
- "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
339
- "model.layers.2.self_attn.o_proj.biases": "model.safetensors",
340
- "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
341
- "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
342
- "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
343
- "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
344
- "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
345
- "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
346
- "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
347
- "model.layers.2.self_attn.v_proj.biases": "model.safetensors",
348
- "model.layers.2.self_attn.v_proj.scales": "model.safetensors",
349
- "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
350
- "model.layers.20.input_layernorm.weight": "model.safetensors",
351
- "model.layers.20.mlp.down_proj.biases": "model.safetensors",
352
- "model.layers.20.mlp.down_proj.scales": "model.safetensors",
353
- "model.layers.20.mlp.down_proj.weight": "model.safetensors",
354
- "model.layers.20.mlp.gate_proj.biases": "model.safetensors",
355
- "model.layers.20.mlp.gate_proj.scales": "model.safetensors",
356
- "model.layers.20.mlp.gate_proj.weight": "model.safetensors",
357
- "model.layers.20.mlp.up_proj.biases": "model.safetensors",
358
- "model.layers.20.mlp.up_proj.scales": "model.safetensors",
359
- "model.layers.20.mlp.up_proj.weight": "model.safetensors",
360
- "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
361
- "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
362
- "model.layers.20.self_attn.k_proj.biases": "model.safetensors",
363
- "model.layers.20.self_attn.k_proj.scales": "model.safetensors",
364
- "model.layers.20.self_attn.k_proj.weight": "model.safetensors",
365
- "model.layers.20.self_attn.o_proj.biases": "model.safetensors",
366
- "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
367
- "model.layers.20.self_attn.o_proj.weight": "model.safetensors",
368
- "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
369
- "model.layers.20.self_attn.q_proj.biases": "model.safetensors",
370
- "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
371
- "model.layers.20.self_attn.q_proj.weight": "model.safetensors",
372
- "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
373
- "model.layers.20.self_attn.v_proj.biases": "model.safetensors",
374
- "model.layers.20.self_attn.v_proj.scales": "model.safetensors",
375
- "model.layers.20.self_attn.v_proj.weight": "model.safetensors",
376
- "model.layers.21.input_layernorm.weight": "model.safetensors",
377
- "model.layers.21.mlp.down_proj.biases": "model.safetensors",
378
- "model.layers.21.mlp.down_proj.scales": "model.safetensors",
379
- "model.layers.21.mlp.down_proj.weight": "model.safetensors",
380
- "model.layers.21.mlp.gate_proj.biases": "model.safetensors",
381
- "model.layers.21.mlp.gate_proj.scales": "model.safetensors",
382
- "model.layers.21.mlp.gate_proj.weight": "model.safetensors",
383
- "model.layers.21.mlp.up_proj.biases": "model.safetensors",
384
- "model.layers.21.mlp.up_proj.scales": "model.safetensors",
385
- "model.layers.21.mlp.up_proj.weight": "model.safetensors",
386
- "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
387
- "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
388
- "model.layers.21.self_attn.k_proj.biases": "model.safetensors",
389
- "model.layers.21.self_attn.k_proj.scales": "model.safetensors",
390
- "model.layers.21.self_attn.k_proj.weight": "model.safetensors",
391
- "model.layers.21.self_attn.o_proj.biases": "model.safetensors",
392
- "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
393
- "model.layers.21.self_attn.o_proj.weight": "model.safetensors",
394
- "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
395
- "model.layers.21.self_attn.q_proj.biases": "model.safetensors",
396
- "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
397
- "model.layers.21.self_attn.q_proj.weight": "model.safetensors",
398
- "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
399
- "model.layers.21.self_attn.v_proj.biases": "model.safetensors",
400
- "model.layers.21.self_attn.v_proj.scales": "model.safetensors",
401
- "model.layers.21.self_attn.v_proj.weight": "model.safetensors",
402
- "model.layers.22.input_layernorm.weight": "model.safetensors",
403
- "model.layers.22.mlp.down_proj.biases": "model.safetensors",
404
- "model.layers.22.mlp.down_proj.scales": "model.safetensors",
405
- "model.layers.22.mlp.down_proj.weight": "model.safetensors",
406
- "model.layers.22.mlp.gate_proj.biases": "model.safetensors",
407
- "model.layers.22.mlp.gate_proj.scales": "model.safetensors",
408
- "model.layers.22.mlp.gate_proj.weight": "model.safetensors",
409
- "model.layers.22.mlp.up_proj.biases": "model.safetensors",
410
- "model.layers.22.mlp.up_proj.scales": "model.safetensors",
411
- "model.layers.22.mlp.up_proj.weight": "model.safetensors",
412
- "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
413
- "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
414
- "model.layers.22.self_attn.k_proj.biases": "model.safetensors",
415
- "model.layers.22.self_attn.k_proj.scales": "model.safetensors",
416
- "model.layers.22.self_attn.k_proj.weight": "model.safetensors",
417
- "model.layers.22.self_attn.o_proj.biases": "model.safetensors",
418
- "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
419
- "model.layers.22.self_attn.o_proj.weight": "model.safetensors",
420
- "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
421
- "model.layers.22.self_attn.q_proj.biases": "model.safetensors",
422
- "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
423
- "model.layers.22.self_attn.q_proj.weight": "model.safetensors",
424
- "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
425
- "model.layers.22.self_attn.v_proj.biases": "model.safetensors",
426
- "model.layers.22.self_attn.v_proj.scales": "model.safetensors",
427
- "model.layers.22.self_attn.v_proj.weight": "model.safetensors",
428
- "model.layers.23.input_layernorm.weight": "model.safetensors",
429
- "model.layers.23.mlp.down_proj.biases": "model.safetensors",
430
- "model.layers.23.mlp.down_proj.scales": "model.safetensors",
431
- "model.layers.23.mlp.down_proj.weight": "model.safetensors",
432
- "model.layers.23.mlp.gate_proj.biases": "model.safetensors",
433
- "model.layers.23.mlp.gate_proj.scales": "model.safetensors",
434
- "model.layers.23.mlp.gate_proj.weight": "model.safetensors",
435
- "model.layers.23.mlp.up_proj.biases": "model.safetensors",
436
- "model.layers.23.mlp.up_proj.scales": "model.safetensors",
437
- "model.layers.23.mlp.up_proj.weight": "model.safetensors",
438
- "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
439
- "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
440
- "model.layers.23.self_attn.k_proj.biases": "model.safetensors",
441
- "model.layers.23.self_attn.k_proj.scales": "model.safetensors",
442
- "model.layers.23.self_attn.k_proj.weight": "model.safetensors",
443
- "model.layers.23.self_attn.o_proj.biases": "model.safetensors",
444
- "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
445
- "model.layers.23.self_attn.o_proj.weight": "model.safetensors",
446
- "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
447
- "model.layers.23.self_attn.q_proj.biases": "model.safetensors",
448
- "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
449
- "model.layers.23.self_attn.q_proj.weight": "model.safetensors",
450
- "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
451
- "model.layers.23.self_attn.v_proj.biases": "model.safetensors",
452
- "model.layers.23.self_attn.v_proj.scales": "model.safetensors",
453
- "model.layers.23.self_attn.v_proj.weight": "model.safetensors",
454
- "model.layers.24.input_layernorm.weight": "model.safetensors",
455
- "model.layers.24.mlp.down_proj.biases": "model.safetensors",
456
- "model.layers.24.mlp.down_proj.scales": "model.safetensors",
457
- "model.layers.24.mlp.down_proj.weight": "model.safetensors",
458
- "model.layers.24.mlp.gate_proj.biases": "model.safetensors",
459
- "model.layers.24.mlp.gate_proj.scales": "model.safetensors",
460
- "model.layers.24.mlp.gate_proj.weight": "model.safetensors",
461
- "model.layers.24.mlp.up_proj.biases": "model.safetensors",
462
- "model.layers.24.mlp.up_proj.scales": "model.safetensors",
463
- "model.layers.24.mlp.up_proj.weight": "model.safetensors",
464
- "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
465
- "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
466
- "model.layers.24.self_attn.k_proj.biases": "model.safetensors",
467
- "model.layers.24.self_attn.k_proj.scales": "model.safetensors",
468
- "model.layers.24.self_attn.k_proj.weight": "model.safetensors",
469
- "model.layers.24.self_attn.o_proj.biases": "model.safetensors",
470
- "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
471
- "model.layers.24.self_attn.o_proj.weight": "model.safetensors",
472
- "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
473
- "model.layers.24.self_attn.q_proj.biases": "model.safetensors",
474
- "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
475
- "model.layers.24.self_attn.q_proj.weight": "model.safetensors",
476
- "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
477
- "model.layers.24.self_attn.v_proj.biases": "model.safetensors",
478
- "model.layers.24.self_attn.v_proj.scales": "model.safetensors",
479
- "model.layers.24.self_attn.v_proj.weight": "model.safetensors",
480
- "model.layers.25.input_layernorm.weight": "model.safetensors",
481
- "model.layers.25.mlp.down_proj.biases": "model.safetensors",
482
- "model.layers.25.mlp.down_proj.scales": "model.safetensors",
483
- "model.layers.25.mlp.down_proj.weight": "model.safetensors",
484
- "model.layers.25.mlp.gate_proj.biases": "model.safetensors",
485
- "model.layers.25.mlp.gate_proj.scales": "model.safetensors",
486
- "model.layers.25.mlp.gate_proj.weight": "model.safetensors",
487
- "model.layers.25.mlp.up_proj.biases": "model.safetensors",
488
- "model.layers.25.mlp.up_proj.scales": "model.safetensors",
489
- "model.layers.25.mlp.up_proj.weight": "model.safetensors",
490
- "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
491
- "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
492
- "model.layers.25.self_attn.k_proj.biases": "model.safetensors",
493
- "model.layers.25.self_attn.k_proj.scales": "model.safetensors",
494
- "model.layers.25.self_attn.k_proj.weight": "model.safetensors",
495
- "model.layers.25.self_attn.o_proj.biases": "model.safetensors",
496
- "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
497
- "model.layers.25.self_attn.o_proj.weight": "model.safetensors",
498
- "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
499
- "model.layers.25.self_attn.q_proj.biases": "model.safetensors",
500
- "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
501
- "model.layers.25.self_attn.q_proj.weight": "model.safetensors",
502
- "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
503
- "model.layers.25.self_attn.v_proj.biases": "model.safetensors",
504
- "model.layers.25.self_attn.v_proj.scales": "model.safetensors",
505
- "model.layers.25.self_attn.v_proj.weight": "model.safetensors",
506
- "model.layers.26.input_layernorm.weight": "model.safetensors",
507
- "model.layers.26.mlp.down_proj.biases": "model.safetensors",
508
- "model.layers.26.mlp.down_proj.scales": "model.safetensors",
509
- "model.layers.26.mlp.down_proj.weight": "model.safetensors",
510
- "model.layers.26.mlp.gate_proj.biases": "model.safetensors",
511
- "model.layers.26.mlp.gate_proj.scales": "model.safetensors",
512
- "model.layers.26.mlp.gate_proj.weight": "model.safetensors",
513
- "model.layers.26.mlp.up_proj.biases": "model.safetensors",
514
- "model.layers.26.mlp.up_proj.scales": "model.safetensors",
515
- "model.layers.26.mlp.up_proj.weight": "model.safetensors",
516
- "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
517
- "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
518
- "model.layers.26.self_attn.k_proj.biases": "model.safetensors",
519
- "model.layers.26.self_attn.k_proj.scales": "model.safetensors",
520
- "model.layers.26.self_attn.k_proj.weight": "model.safetensors",
521
- "model.layers.26.self_attn.o_proj.biases": "model.safetensors",
522
- "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
523
- "model.layers.26.self_attn.o_proj.weight": "model.safetensors",
524
- "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
525
- "model.layers.26.self_attn.q_proj.biases": "model.safetensors",
526
- "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
527
- "model.layers.26.self_attn.q_proj.weight": "model.safetensors",
528
- "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
529
- "model.layers.26.self_attn.v_proj.biases": "model.safetensors",
530
- "model.layers.26.self_attn.v_proj.scales": "model.safetensors",
531
- "model.layers.26.self_attn.v_proj.weight": "model.safetensors",
532
- "model.layers.27.input_layernorm.weight": "model.safetensors",
533
- "model.layers.27.mlp.down_proj.biases": "model.safetensors",
534
- "model.layers.27.mlp.down_proj.scales": "model.safetensors",
535
- "model.layers.27.mlp.down_proj.weight": "model.safetensors",
536
- "model.layers.27.mlp.gate_proj.biases": "model.safetensors",
537
- "model.layers.27.mlp.gate_proj.scales": "model.safetensors",
538
- "model.layers.27.mlp.gate_proj.weight": "model.safetensors",
539
- "model.layers.27.mlp.up_proj.biases": "model.safetensors",
540
- "model.layers.27.mlp.up_proj.scales": "model.safetensors",
541
- "model.layers.27.mlp.up_proj.weight": "model.safetensors",
542
- "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
543
- "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
544
- "model.layers.27.self_attn.k_proj.biases": "model.safetensors",
545
- "model.layers.27.self_attn.k_proj.scales": "model.safetensors",
546
- "model.layers.27.self_attn.k_proj.weight": "model.safetensors",
547
- "model.layers.27.self_attn.o_proj.biases": "model.safetensors",
548
- "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
549
- "model.layers.27.self_attn.o_proj.weight": "model.safetensors",
550
- "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
551
- "model.layers.27.self_attn.q_proj.biases": "model.safetensors",
552
- "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
553
- "model.layers.27.self_attn.q_proj.weight": "model.safetensors",
554
- "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
555
- "model.layers.27.self_attn.v_proj.biases": "model.safetensors",
556
- "model.layers.27.self_attn.v_proj.scales": "model.safetensors",
557
- "model.layers.27.self_attn.v_proj.weight": "model.safetensors",
558
- "model.layers.3.input_layernorm.weight": "model.safetensors",
559
- "model.layers.3.mlp.down_proj.biases": "model.safetensors",
560
- "model.layers.3.mlp.down_proj.scales": "model.safetensors",
561
- "model.layers.3.mlp.down_proj.weight": "model.safetensors",
562
- "model.layers.3.mlp.gate_proj.biases": "model.safetensors",
563
- "model.layers.3.mlp.gate_proj.scales": "model.safetensors",
564
- "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
565
- "model.layers.3.mlp.up_proj.biases": "model.safetensors",
566
- "model.layers.3.mlp.up_proj.scales": "model.safetensors",
567
- "model.layers.3.mlp.up_proj.weight": "model.safetensors",
568
- "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
569
- "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
570
- "model.layers.3.self_attn.k_proj.biases": "model.safetensors",
571
- "model.layers.3.self_attn.k_proj.scales": "model.safetensors",
572
- "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
573
- "model.layers.3.self_attn.o_proj.biases": "model.safetensors",
574
- "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
575
- "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
576
- "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
577
- "model.layers.3.self_attn.q_proj.biases": "model.safetensors",
578
- "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
579
- "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
580
- "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
581
- "model.layers.3.self_attn.v_proj.biases": "model.safetensors",
582
- "model.layers.3.self_attn.v_proj.scales": "model.safetensors",
583
- "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
584
- "model.layers.4.input_layernorm.weight": "model.safetensors",
585
- "model.layers.4.mlp.down_proj.biases": "model.safetensors",
586
- "model.layers.4.mlp.down_proj.scales": "model.safetensors",
587
- "model.layers.4.mlp.down_proj.weight": "model.safetensors",
588
- "model.layers.4.mlp.gate_proj.biases": "model.safetensors",
589
- "model.layers.4.mlp.gate_proj.scales": "model.safetensors",
590
- "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
591
- "model.layers.4.mlp.up_proj.biases": "model.safetensors",
592
- "model.layers.4.mlp.up_proj.scales": "model.safetensors",
593
- "model.layers.4.mlp.up_proj.weight": "model.safetensors",
594
- "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
595
- "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
596
- "model.layers.4.self_attn.k_proj.biases": "model.safetensors",
597
- "model.layers.4.self_attn.k_proj.scales": "model.safetensors",
598
- "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
599
- "model.layers.4.self_attn.o_proj.biases": "model.safetensors",
600
- "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
601
- "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
602
- "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
603
- "model.layers.4.self_attn.q_proj.biases": "model.safetensors",
604
- "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
605
- "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
606
- "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
607
- "model.layers.4.self_attn.v_proj.biases": "model.safetensors",
608
- "model.layers.4.self_attn.v_proj.scales": "model.safetensors",
609
- "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
610
- "model.layers.5.input_layernorm.weight": "model.safetensors",
611
- "model.layers.5.mlp.down_proj.biases": "model.safetensors",
612
- "model.layers.5.mlp.down_proj.scales": "model.safetensors",
613
- "model.layers.5.mlp.down_proj.weight": "model.safetensors",
614
- "model.layers.5.mlp.gate_proj.biases": "model.safetensors",
615
- "model.layers.5.mlp.gate_proj.scales": "model.safetensors",
616
- "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
617
- "model.layers.5.mlp.up_proj.biases": "model.safetensors",
618
- "model.layers.5.mlp.up_proj.scales": "model.safetensors",
619
- "model.layers.5.mlp.up_proj.weight": "model.safetensors",
620
- "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
621
- "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
622
- "model.layers.5.self_attn.k_proj.biases": "model.safetensors",
623
- "model.layers.5.self_attn.k_proj.scales": "model.safetensors",
624
- "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
625
- "model.layers.5.self_attn.o_proj.biases": "model.safetensors",
626
- "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
627
- "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
628
- "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
629
- "model.layers.5.self_attn.q_proj.biases": "model.safetensors",
630
- "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
631
- "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
632
- "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
633
- "model.layers.5.self_attn.v_proj.biases": "model.safetensors",
634
- "model.layers.5.self_attn.v_proj.scales": "model.safetensors",
635
- "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
636
- "model.layers.6.input_layernorm.weight": "model.safetensors",
637
- "model.layers.6.mlp.down_proj.biases": "model.safetensors",
638
- "model.layers.6.mlp.down_proj.scales": "model.safetensors",
639
- "model.layers.6.mlp.down_proj.weight": "model.safetensors",
640
- "model.layers.6.mlp.gate_proj.biases": "model.safetensors",
641
- "model.layers.6.mlp.gate_proj.scales": "model.safetensors",
642
- "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
643
- "model.layers.6.mlp.up_proj.biases": "model.safetensors",
644
- "model.layers.6.mlp.up_proj.scales": "model.safetensors",
645
- "model.layers.6.mlp.up_proj.weight": "model.safetensors",
646
- "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
647
- "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
648
- "model.layers.6.self_attn.k_proj.biases": "model.safetensors",
649
- "model.layers.6.self_attn.k_proj.scales": "model.safetensors",
650
- "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
651
- "model.layers.6.self_attn.o_proj.biases": "model.safetensors",
652
- "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
653
- "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
654
- "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
655
- "model.layers.6.self_attn.q_proj.biases": "model.safetensors",
656
- "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
657
- "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
658
- "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
659
- "model.layers.6.self_attn.v_proj.biases": "model.safetensors",
660
- "model.layers.6.self_attn.v_proj.scales": "model.safetensors",
661
- "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
662
- "model.layers.7.input_layernorm.weight": "model.safetensors",
663
- "model.layers.7.mlp.down_proj.biases": "model.safetensors",
664
- "model.layers.7.mlp.down_proj.scales": "model.safetensors",
665
- "model.layers.7.mlp.down_proj.weight": "model.safetensors",
666
- "model.layers.7.mlp.gate_proj.biases": "model.safetensors",
667
- "model.layers.7.mlp.gate_proj.scales": "model.safetensors",
668
- "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
669
- "model.layers.7.mlp.up_proj.biases": "model.safetensors",
670
- "model.layers.7.mlp.up_proj.scales": "model.safetensors",
671
- "model.layers.7.mlp.up_proj.weight": "model.safetensors",
672
- "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
673
- "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
674
- "model.layers.7.self_attn.k_proj.biases": "model.safetensors",
675
- "model.layers.7.self_attn.k_proj.scales": "model.safetensors",
676
- "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
677
- "model.layers.7.self_attn.o_proj.biases": "model.safetensors",
678
- "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
679
- "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
680
- "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
681
- "model.layers.7.self_attn.q_proj.biases": "model.safetensors",
682
- "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
683
- "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
684
- "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
685
- "model.layers.7.self_attn.v_proj.biases": "model.safetensors",
686
- "model.layers.7.self_attn.v_proj.scales": "model.safetensors",
687
- "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
688
- "model.layers.8.input_layernorm.weight": "model.safetensors",
689
- "model.layers.8.mlp.down_proj.biases": "model.safetensors",
690
- "model.layers.8.mlp.down_proj.scales": "model.safetensors",
691
- "model.layers.8.mlp.down_proj.weight": "model.safetensors",
692
- "model.layers.8.mlp.gate_proj.biases": "model.safetensors",
693
- "model.layers.8.mlp.gate_proj.scales": "model.safetensors",
694
- "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
695
- "model.layers.8.mlp.up_proj.biases": "model.safetensors",
696
- "model.layers.8.mlp.up_proj.scales": "model.safetensors",
697
- "model.layers.8.mlp.up_proj.weight": "model.safetensors",
698
- "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
699
- "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
700
- "model.layers.8.self_attn.k_proj.biases": "model.safetensors",
701
- "model.layers.8.self_attn.k_proj.scales": "model.safetensors",
702
- "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
703
- "model.layers.8.self_attn.o_proj.biases": "model.safetensors",
704
- "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
705
- "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
706
- "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
707
- "model.layers.8.self_attn.q_proj.biases": "model.safetensors",
708
- "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
709
- "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
710
- "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
711
- "model.layers.8.self_attn.v_proj.biases": "model.safetensors",
712
- "model.layers.8.self_attn.v_proj.scales": "model.safetensors",
713
- "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
714
- "model.layers.9.input_layernorm.weight": "model.safetensors",
715
- "model.layers.9.mlp.down_proj.biases": "model.safetensors",
716
- "model.layers.9.mlp.down_proj.scales": "model.safetensors",
717
- "model.layers.9.mlp.down_proj.weight": "model.safetensors",
718
- "model.layers.9.mlp.gate_proj.biases": "model.safetensors",
719
- "model.layers.9.mlp.gate_proj.scales": "model.safetensors",
720
- "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
721
- "model.layers.9.mlp.up_proj.biases": "model.safetensors",
722
- "model.layers.9.mlp.up_proj.scales": "model.safetensors",
723
- "model.layers.9.mlp.up_proj.weight": "model.safetensors",
724
- "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
725
- "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
726
- "model.layers.9.self_attn.k_proj.biases": "model.safetensors",
727
- "model.layers.9.self_attn.k_proj.scales": "model.safetensors",
728
- "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
729
- "model.layers.9.self_attn.o_proj.biases": "model.safetensors",
730
- "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
731
- "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
732
- "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
733
- "model.layers.9.self_attn.q_proj.biases": "model.safetensors",
734
- "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
735
- "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
736
- "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
737
- "model.layers.9.self_attn.v_proj.biases": "model.safetensors",
738
- "model.layers.9.self_attn.v_proj.scales": "model.safetensors",
739
- "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
740
- "model.norm.weight": "model.safetensors"
741
- }
742
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,4_mixed/tokenizer_config.json DELETED
@@ -1,195 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "151643": {
7
- "content": "<|end▁of▁sentence|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "151644": {
15
- "content": "<|User|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "151645": {
23
- "content": "<|Assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "151646": {
31
- "content": "<|begin▁of▁sentence|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "151647": {
39
- "content": "<|EOT|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "151648": {
47
- "content": "<think>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "151649": {
55
- "content": "</think>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "151650": {
63
- "content": "<|quad_start|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "151651": {
71
- "content": "<|quad_end|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "151652": {
79
- "content": "<|vision_start|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "151653": {
87
- "content": "<|vision_end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "151654": {
95
- "content": "<|vision_pad|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "151655": {
103
- "content": "<|image_pad|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "151656": {
111
- "content": "<|video_pad|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "151657": {
119
- "content": "<tool_call>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "151658": {
127
- "content": "</tool_call>",
128
- "lstrip": false,
129
- "normalized": false,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "151659": {
135
- "content": "<|fim_prefix|>",
136
- "lstrip": false,
137
- "normalized": false,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "151660": {
143
- "content": "<|fim_middle|>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "151661": {
151
- "content": "<|fim_suffix|>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "151662": {
159
- "content": "<|fim_pad|>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "151663": {
167
- "content": "<|repo_name|>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "151664": {
175
- "content": "<|file_sep|>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- }
182
- },
183
- "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
- "clean_up_tokenization_spaces": false,
186
- "eos_token": "<|end▁of▁sentence|>",
187
- "extra_special_tokens": {},
188
- "legacy": true,
189
- "model_max_length": 16384,
190
- "pad_token": "<|end▁of▁sentence|>",
191
- "sp_model_kwargs": {},
192
- "tokenizer_class": "LlamaTokenizerFast",
193
- "unk_token": null,
194
- "use_default_system_prompt": false
195
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/config.json DELETED
@@ -1,1790 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen2ForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization": {
19
- "group_size": 64,
20
- "bits": null,
21
- "model.embed_tokens": {
22
- "group_size": 64,
23
- "bits": 3
24
- },
25
- "model.layers.0.self_attn.q_proj": {
26
- "group_size": 64,
27
- "bits": 3
28
- },
29
- "model.layers.0.self_attn.k_proj": {
30
- "group_size": 64,
31
- "bits": 3
32
- },
33
- "model.layers.0.self_attn.v_proj": {
34
- "group_size": 64,
35
- "bits": 4
36
- },
37
- "model.layers.0.self_attn.o_proj": {
38
- "group_size": 64,
39
- "bits": 3
40
- },
41
- "model.layers.0.self_attn.rope": false,
42
- "model.layers.0.mlp.gate_proj": {
43
- "group_size": 64,
44
- "bits": 3
45
- },
46
- "model.layers.0.mlp.down_proj": {
47
- "group_size": 64,
48
- "bits": 4
49
- },
50
- "model.layers.0.mlp.up_proj": {
51
- "group_size": 64,
52
- "bits": 3
53
- },
54
- "model.layers.0.input_layernorm": false,
55
- "model.layers.0.post_attention_layernorm": false,
56
- "model.layers.1.self_attn.q_proj": {
57
- "group_size": 64,
58
- "bits": 3
59
- },
60
- "model.layers.1.self_attn.k_proj": {
61
- "group_size": 64,
62
- "bits": 3
63
- },
64
- "model.layers.1.self_attn.v_proj": {
65
- "group_size": 64,
66
- "bits": 4
67
- },
68
- "model.layers.1.self_attn.o_proj": {
69
- "group_size": 64,
70
- "bits": 3
71
- },
72
- "model.layers.1.self_attn.rope": false,
73
- "model.layers.1.mlp.gate_proj": {
74
- "group_size": 64,
75
- "bits": 3
76
- },
77
- "model.layers.1.mlp.down_proj": {
78
- "group_size": 64,
79
- "bits": 4
80
- },
81
- "model.layers.1.mlp.up_proj": {
82
- "group_size": 64,
83
- "bits": 3
84
- },
85
- "model.layers.1.input_layernorm": false,
86
- "model.layers.1.post_attention_layernorm": false,
87
- "model.layers.2.self_attn.q_proj": {
88
- "group_size": 64,
89
- "bits": 3
90
- },
91
- "model.layers.2.self_attn.k_proj": {
92
- "group_size": 64,
93
- "bits": 3
94
- },
95
- "model.layers.2.self_attn.v_proj": {
96
- "group_size": 64,
97
- "bits": 4
98
- },
99
- "model.layers.2.self_attn.o_proj": {
100
- "group_size": 64,
101
- "bits": 3
102
- },
103
- "model.layers.2.self_attn.rope": false,
104
- "model.layers.2.mlp.gate_proj": {
105
- "group_size": 64,
106
- "bits": 3
107
- },
108
- "model.layers.2.mlp.down_proj": {
109
- "group_size": 64,
110
- "bits": 4
111
- },
112
- "model.layers.2.mlp.up_proj": {
113
- "group_size": 64,
114
- "bits": 3
115
- },
116
- "model.layers.2.input_layernorm": false,
117
- "model.layers.2.post_attention_layernorm": false,
118
- "model.layers.3.self_attn.q_proj": {
119
- "group_size": 64,
120
- "bits": 3
121
- },
122
- "model.layers.3.self_attn.k_proj": {
123
- "group_size": 64,
124
- "bits": 3
125
- },
126
- "model.layers.3.self_attn.v_proj": {
127
- "group_size": 64,
128
- "bits": 3
129
- },
130
- "model.layers.3.self_attn.o_proj": {
131
- "group_size": 64,
132
- "bits": 3
133
- },
134
- "model.layers.3.self_attn.rope": false,
135
- "model.layers.3.mlp.gate_proj": {
136
- "group_size": 64,
137
- "bits": 3
138
- },
139
- "model.layers.3.mlp.down_proj": {
140
- "group_size": 64,
141
- "bits": 3
142
- },
143
- "model.layers.3.mlp.up_proj": {
144
- "group_size": 64,
145
- "bits": 3
146
- },
147
- "model.layers.3.input_layernorm": false,
148
- "model.layers.3.post_attention_layernorm": false,
149
- "model.layers.4.self_attn.q_proj": {
150
- "group_size": 64,
151
- "bits": 3
152
- },
153
- "model.layers.4.self_attn.k_proj": {
154
- "group_size": 64,
155
- "bits": 3
156
- },
157
- "model.layers.4.self_attn.v_proj": {
158
- "group_size": 64,
159
- "bits": 3
160
- },
161
- "model.layers.4.self_attn.o_proj": {
162
- "group_size": 64,
163
- "bits": 3
164
- },
165
- "model.layers.4.self_attn.rope": false,
166
- "model.layers.4.mlp.gate_proj": {
167
- "group_size": 64,
168
- "bits": 3
169
- },
170
- "model.layers.4.mlp.down_proj": {
171
- "group_size": 64,
172
- "bits": 3
173
- },
174
- "model.layers.4.mlp.up_proj": {
175
- "group_size": 64,
176
- "bits": 3
177
- },
178
- "model.layers.4.input_layernorm": false,
179
- "model.layers.4.post_attention_layernorm": false,
180
- "model.layers.5.self_attn.q_proj": {
181
- "group_size": 64,
182
- "bits": 3
183
- },
184
- "model.layers.5.self_attn.k_proj": {
185
- "group_size": 64,
186
- "bits": 3
187
- },
188
- "model.layers.5.self_attn.v_proj": {
189
- "group_size": 64,
190
- "bits": 4
191
- },
192
- "model.layers.5.self_attn.o_proj": {
193
- "group_size": 64,
194
- "bits": 3
195
- },
196
- "model.layers.5.self_attn.rope": false,
197
- "model.layers.5.mlp.gate_proj": {
198
- "group_size": 64,
199
- "bits": 3
200
- },
201
- "model.layers.5.mlp.down_proj": {
202
- "group_size": 64,
203
- "bits": 4
204
- },
205
- "model.layers.5.mlp.up_proj": {
206
- "group_size": 64,
207
- "bits": 3
208
- },
209
- "model.layers.5.input_layernorm": false,
210
- "model.layers.5.post_attention_layernorm": false,
211
- "model.layers.6.self_attn.q_proj": {
212
- "group_size": 64,
213
- "bits": 3
214
- },
215
- "model.layers.6.self_attn.k_proj": {
216
- "group_size": 64,
217
- "bits": 3
218
- },
219
- "model.layers.6.self_attn.v_proj": {
220
- "group_size": 64,
221
- "bits": 3
222
- },
223
- "model.layers.6.self_attn.o_proj": {
224
- "group_size": 64,
225
- "bits": 3
226
- },
227
- "model.layers.6.self_attn.rope": false,
228
- "model.layers.6.mlp.gate_proj": {
229
- "group_size": 64,
230
- "bits": 3
231
- },
232
- "model.layers.6.mlp.down_proj": {
233
- "group_size": 64,
234
- "bits": 3
235
- },
236
- "model.layers.6.mlp.up_proj": {
237
- "group_size": 64,
238
- "bits": 3
239
- },
240
- "model.layers.6.input_layernorm": false,
241
- "model.layers.6.post_attention_layernorm": false,
242
- "model.layers.7.self_attn.q_proj": {
243
- "group_size": 64,
244
- "bits": 3
245
- },
246
- "model.layers.7.self_attn.k_proj": {
247
- "group_size": 64,
248
- "bits": 3
249
- },
250
- "model.layers.7.self_attn.v_proj": {
251
- "group_size": 64,
252
- "bits": 3
253
- },
254
- "model.layers.7.self_attn.o_proj": {
255
- "group_size": 64,
256
- "bits": 3
257
- },
258
- "model.layers.7.self_attn.rope": false,
259
- "model.layers.7.mlp.gate_proj": {
260
- "group_size": 64,
261
- "bits": 3
262
- },
263
- "model.layers.7.mlp.down_proj": {
264
- "group_size": 64,
265
- "bits": 3
266
- },
267
- "model.layers.7.mlp.up_proj": {
268
- "group_size": 64,
269
- "bits": 3
270
- },
271
- "model.layers.7.input_layernorm": false,
272
- "model.layers.7.post_attention_layernorm": false,
273
- "model.layers.8.self_attn.q_proj": {
274
- "group_size": 64,
275
- "bits": 3
276
- },
277
- "model.layers.8.self_attn.k_proj": {
278
- "group_size": 64,
279
- "bits": 3
280
- },
281
- "model.layers.8.self_attn.v_proj": {
282
- "group_size": 64,
283
- "bits": 4
284
- },
285
- "model.layers.8.self_attn.o_proj": {
286
- "group_size": 64,
287
- "bits": 3
288
- },
289
- "model.layers.8.self_attn.rope": false,
290
- "model.layers.8.mlp.gate_proj": {
291
- "group_size": 64,
292
- "bits": 3
293
- },
294
- "model.layers.8.mlp.down_proj": {
295
- "group_size": 64,
296
- "bits": 4
297
- },
298
- "model.layers.8.mlp.up_proj": {
299
- "group_size": 64,
300
- "bits": 3
301
- },
302
- "model.layers.8.input_layernorm": false,
303
- "model.layers.8.post_attention_layernorm": false,
304
- "model.layers.9.self_attn.q_proj": {
305
- "group_size": 64,
306
- "bits": 3
307
- },
308
- "model.layers.9.self_attn.k_proj": {
309
- "group_size": 64,
310
- "bits": 3
311
- },
312
- "model.layers.9.self_attn.v_proj": {
313
- "group_size": 64,
314
- "bits": 3
315
- },
316
- "model.layers.9.self_attn.o_proj": {
317
- "group_size": 64,
318
- "bits": 3
319
- },
320
- "model.layers.9.self_attn.rope": false,
321
- "model.layers.9.mlp.gate_proj": {
322
- "group_size": 64,
323
- "bits": 3
324
- },
325
- "model.layers.9.mlp.down_proj": {
326
- "group_size": 64,
327
- "bits": 3
328
- },
329
- "model.layers.9.mlp.up_proj": {
330
- "group_size": 64,
331
- "bits": 3
332
- },
333
- "model.layers.9.input_layernorm": false,
334
- "model.layers.9.post_attention_layernorm": false,
335
- "model.layers.10.self_attn.q_proj": {
336
- "group_size": 64,
337
- "bits": 3
338
- },
339
- "model.layers.10.self_attn.k_proj": {
340
- "group_size": 64,
341
- "bits": 3
342
- },
343
- "model.layers.10.self_attn.v_proj": {
344
- "group_size": 64,
345
- "bits": 3
346
- },
347
- "model.layers.10.self_attn.o_proj": {
348
- "group_size": 64,
349
- "bits": 3
350
- },
351
- "model.layers.10.self_attn.rope": false,
352
- "model.layers.10.mlp.gate_proj": {
353
- "group_size": 64,
354
- "bits": 3
355
- },
356
- "model.layers.10.mlp.down_proj": {
357
- "group_size": 64,
358
- "bits": 3
359
- },
360
- "model.layers.10.mlp.up_proj": {
361
- "group_size": 64,
362
- "bits": 3
363
- },
364
- "model.layers.10.input_layernorm": false,
365
- "model.layers.10.post_attention_layernorm": false,
366
- "model.layers.11.self_attn.q_proj": {
367
- "group_size": 64,
368
- "bits": 3
369
- },
370
- "model.layers.11.self_attn.k_proj": {
371
- "group_size": 64,
372
- "bits": 3
373
- },
374
- "model.layers.11.self_attn.v_proj": {
375
- "group_size": 64,
376
- "bits": 4
377
- },
378
- "model.layers.11.self_attn.o_proj": {
379
- "group_size": 64,
380
- "bits": 3
381
- },
382
- "model.layers.11.self_attn.rope": false,
383
- "model.layers.11.mlp.gate_proj": {
384
- "group_size": 64,
385
- "bits": 3
386
- },
387
- "model.layers.11.mlp.down_proj": {
388
- "group_size": 64,
389
- "bits": 4
390
- },
391
- "model.layers.11.mlp.up_proj": {
392
- "group_size": 64,
393
- "bits": 3
394
- },
395
- "model.layers.11.input_layernorm": false,
396
- "model.layers.11.post_attention_layernorm": false,
397
- "model.layers.12.self_attn.q_proj": {
398
- "group_size": 64,
399
- "bits": 3
400
- },
401
- "model.layers.12.self_attn.k_proj": {
402
- "group_size": 64,
403
- "bits": 3
404
- },
405
- "model.layers.12.self_attn.v_proj": {
406
- "group_size": 64,
407
- "bits": 3
408
- },
409
- "model.layers.12.self_attn.o_proj": {
410
- "group_size": 64,
411
- "bits": 3
412
- },
413
- "model.layers.12.self_attn.rope": false,
414
- "model.layers.12.mlp.gate_proj": {
415
- "group_size": 64,
416
- "bits": 3
417
- },
418
- "model.layers.12.mlp.down_proj": {
419
- "group_size": 64,
420
- "bits": 3
421
- },
422
- "model.layers.12.mlp.up_proj": {
423
- "group_size": 64,
424
- "bits": 3
425
- },
426
- "model.layers.12.input_layernorm": false,
427
- "model.layers.12.post_attention_layernorm": false,
428
- "model.layers.13.self_attn.q_proj": {
429
- "group_size": 64,
430
- "bits": 3
431
- },
432
- "model.layers.13.self_attn.k_proj": {
433
- "group_size": 64,
434
- "bits": 3
435
- },
436
- "model.layers.13.self_attn.v_proj": {
437
- "group_size": 64,
438
- "bits": 3
439
- },
440
- "model.layers.13.self_attn.o_proj": {
441
- "group_size": 64,
442
- "bits": 3
443
- },
444
- "model.layers.13.self_attn.rope": false,
445
- "model.layers.13.mlp.gate_proj": {
446
- "group_size": 64,
447
- "bits": 3
448
- },
449
- "model.layers.13.mlp.down_proj": {
450
- "group_size": 64,
451
- "bits": 3
452
- },
453
- "model.layers.13.mlp.up_proj": {
454
- "group_size": 64,
455
- "bits": 3
456
- },
457
- "model.layers.13.input_layernorm": false,
458
- "model.layers.13.post_attention_layernorm": false,
459
- "model.layers.14.self_attn.q_proj": {
460
- "group_size": 64,
461
- "bits": 3
462
- },
463
- "model.layers.14.self_attn.k_proj": {
464
- "group_size": 64,
465
- "bits": 3
466
- },
467
- "model.layers.14.self_attn.v_proj": {
468
- "group_size": 64,
469
- "bits": 4
470
- },
471
- "model.layers.14.self_attn.o_proj": {
472
- "group_size": 64,
473
- "bits": 3
474
- },
475
- "model.layers.14.self_attn.rope": false,
476
- "model.layers.14.mlp.gate_proj": {
477
- "group_size": 64,
478
- "bits": 3
479
- },
480
- "model.layers.14.mlp.down_proj": {
481
- "group_size": 64,
482
- "bits": 4
483
- },
484
- "model.layers.14.mlp.up_proj": {
485
- "group_size": 64,
486
- "bits": 3
487
- },
488
- "model.layers.14.input_layernorm": false,
489
- "model.layers.14.post_attention_layernorm": false,
490
- "model.layers.15.self_attn.q_proj": {
491
- "group_size": 64,
492
- "bits": 3
493
- },
494
- "model.layers.15.self_attn.k_proj": {
495
- "group_size": 64,
496
- "bits": 3
497
- },
498
- "model.layers.15.self_attn.v_proj": {
499
- "group_size": 64,
500
- "bits": 3
501
- },
502
- "model.layers.15.self_attn.o_proj": {
503
- "group_size": 64,
504
- "bits": 3
505
- },
506
- "model.layers.15.self_attn.rope": false,
507
- "model.layers.15.mlp.gate_proj": {
508
- "group_size": 64,
509
- "bits": 3
510
- },
511
- "model.layers.15.mlp.down_proj": {
512
- "group_size": 64,
513
- "bits": 3
514
- },
515
- "model.layers.15.mlp.up_proj": {
516
- "group_size": 64,
517
- "bits": 3
518
- },
519
- "model.layers.15.input_layernorm": false,
520
- "model.layers.15.post_attention_layernorm": false,
521
- "model.layers.16.self_attn.q_proj": {
522
- "group_size": 64,
523
- "bits": 3
524
- },
525
- "model.layers.16.self_attn.k_proj": {
526
- "group_size": 64,
527
- "bits": 3
528
- },
529
- "model.layers.16.self_attn.v_proj": {
530
- "group_size": 64,
531
- "bits": 3
532
- },
533
- "model.layers.16.self_attn.o_proj": {
534
- "group_size": 64,
535
- "bits": 3
536
- },
537
- "model.layers.16.self_attn.rope": false,
538
- "model.layers.16.mlp.gate_proj": {
539
- "group_size": 64,
540
- "bits": 3
541
- },
542
- "model.layers.16.mlp.down_proj": {
543
- "group_size": 64,
544
- "bits": 3
545
- },
546
- "model.layers.16.mlp.up_proj": {
547
- "group_size": 64,
548
- "bits": 3
549
- },
550
- "model.layers.16.input_layernorm": false,
551
- "model.layers.16.post_attention_layernorm": false,
552
- "model.layers.17.self_attn.q_proj": {
553
- "group_size": 64,
554
- "bits": 3
555
- },
556
- "model.layers.17.self_attn.k_proj": {
557
- "group_size": 64,
558
- "bits": 3
559
- },
560
- "model.layers.17.self_attn.v_proj": {
561
- "group_size": 64,
562
- "bits": 4
563
- },
564
- "model.layers.17.self_attn.o_proj": {
565
- "group_size": 64,
566
- "bits": 3
567
- },
568
- "model.layers.17.self_attn.rope": false,
569
- "model.layers.17.mlp.gate_proj": {
570
- "group_size": 64,
571
- "bits": 3
572
- },
573
- "model.layers.17.mlp.down_proj": {
574
- "group_size": 64,
575
- "bits": 4
576
- },
577
- "model.layers.17.mlp.up_proj": {
578
- "group_size": 64,
579
- "bits": 3
580
- },
581
- "model.layers.17.input_layernorm": false,
582
- "model.layers.17.post_attention_layernorm": false,
583
- "model.layers.18.self_attn.q_proj": {
584
- "group_size": 64,
585
- "bits": 3
586
- },
587
- "model.layers.18.self_attn.k_proj": {
588
- "group_size": 64,
589
- "bits": 3
590
- },
591
- "model.layers.18.self_attn.v_proj": {
592
- "group_size": 64,
593
- "bits": 3
594
- },
595
- "model.layers.18.self_attn.o_proj": {
596
- "group_size": 64,
597
- "bits": 3
598
- },
599
- "model.layers.18.self_attn.rope": false,
600
- "model.layers.18.mlp.gate_proj": {
601
- "group_size": 64,
602
- "bits": 3
603
- },
604
- "model.layers.18.mlp.down_proj": {
605
- "group_size": 64,
606
- "bits": 3
607
- },
608
- "model.layers.18.mlp.up_proj": {
609
- "group_size": 64,
610
- "bits": 3
611
- },
612
- "model.layers.18.input_layernorm": false,
613
- "model.layers.18.post_attention_layernorm": false,
614
- "model.layers.19.self_attn.q_proj": {
615
- "group_size": 64,
616
- "bits": 3
617
- },
618
- "model.layers.19.self_attn.k_proj": {
619
- "group_size": 64,
620
- "bits": 3
621
- },
622
- "model.layers.19.self_attn.v_proj": {
623
- "group_size": 64,
624
- "bits": 3
625
- },
626
- "model.layers.19.self_attn.o_proj": {
627
- "group_size": 64,
628
- "bits": 3
629
- },
630
- "model.layers.19.self_attn.rope": false,
631
- "model.layers.19.mlp.gate_proj": {
632
- "group_size": 64,
633
- "bits": 3
634
- },
635
- "model.layers.19.mlp.down_proj": {
636
- "group_size": 64,
637
- "bits": 3
638
- },
639
- "model.layers.19.mlp.up_proj": {
640
- "group_size": 64,
641
- "bits": 3
642
- },
643
- "model.layers.19.input_layernorm": false,
644
- "model.layers.19.post_attention_layernorm": false,
645
- "model.layers.20.self_attn.q_proj": {
646
- "group_size": 64,
647
- "bits": 3
648
- },
649
- "model.layers.20.self_attn.k_proj": {
650
- "group_size": 64,
651
- "bits": 3
652
- },
653
- "model.layers.20.self_attn.v_proj": {
654
- "group_size": 64,
655
- "bits": 4
656
- },
657
- "model.layers.20.self_attn.o_proj": {
658
- "group_size": 64,
659
- "bits": 3
660
- },
661
- "model.layers.20.self_attn.rope": false,
662
- "model.layers.20.mlp.gate_proj": {
663
- "group_size": 64,
664
- "bits": 3
665
- },
666
- "model.layers.20.mlp.down_proj": {
667
- "group_size": 64,
668
- "bits": 4
669
- },
670
- "model.layers.20.mlp.up_proj": {
671
- "group_size": 64,
672
- "bits": 3
673
- },
674
- "model.layers.20.input_layernorm": false,
675
- "model.layers.20.post_attention_layernorm": false,
676
- "model.layers.21.self_attn.q_proj": {
677
- "group_size": 64,
678
- "bits": 3
679
- },
680
- "model.layers.21.self_attn.k_proj": {
681
- "group_size": 64,
682
- "bits": 3
683
- },
684
- "model.layers.21.self_attn.v_proj": {
685
- "group_size": 64,
686
- "bits": 3
687
- },
688
- "model.layers.21.self_attn.o_proj": {
689
- "group_size": 64,
690
- "bits": 3
691
- },
692
- "model.layers.21.self_attn.rope": false,
693
- "model.layers.21.mlp.gate_proj": {
694
- "group_size": 64,
695
- "bits": 3
696
- },
697
- "model.layers.21.mlp.down_proj": {
698
- "group_size": 64,
699
- "bits": 3
700
- },
701
- "model.layers.21.mlp.up_proj": {
702
- "group_size": 64,
703
- "bits": 3
704
- },
705
- "model.layers.21.input_layernorm": false,
706
- "model.layers.21.post_attention_layernorm": false,
707
- "model.layers.22.self_attn.q_proj": {
708
- "group_size": 64,
709
- "bits": 3
710
- },
711
- "model.layers.22.self_attn.k_proj": {
712
- "group_size": 64,
713
- "bits": 3
714
- },
715
- "model.layers.22.self_attn.v_proj": {
716
- "group_size": 64,
717
- "bits": 3
718
- },
719
- "model.layers.22.self_attn.o_proj": {
720
- "group_size": 64,
721
- "bits": 3
722
- },
723
- "model.layers.22.self_attn.rope": false,
724
- "model.layers.22.mlp.gate_proj": {
725
- "group_size": 64,
726
- "bits": 3
727
- },
728
- "model.layers.22.mlp.down_proj": {
729
- "group_size": 64,
730
- "bits": 3
731
- },
732
- "model.layers.22.mlp.up_proj": {
733
- "group_size": 64,
734
- "bits": 3
735
- },
736
- "model.layers.22.input_layernorm": false,
737
- "model.layers.22.post_attention_layernorm": false,
738
- "model.layers.23.self_attn.q_proj": {
739
- "group_size": 64,
740
- "bits": 3
741
- },
742
- "model.layers.23.self_attn.k_proj": {
743
- "group_size": 64,
744
- "bits": 3
745
- },
746
- "model.layers.23.self_attn.v_proj": {
747
- "group_size": 64,
748
- "bits": 4
749
- },
750
- "model.layers.23.self_attn.o_proj": {
751
- "group_size": 64,
752
- "bits": 3
753
- },
754
- "model.layers.23.self_attn.rope": false,
755
- "model.layers.23.mlp.gate_proj": {
756
- "group_size": 64,
757
- "bits": 3
758
- },
759
- "model.layers.23.mlp.down_proj": {
760
- "group_size": 64,
761
- "bits": 4
762
- },
763
- "model.layers.23.mlp.up_proj": {
764
- "group_size": 64,
765
- "bits": 3
766
- },
767
- "model.layers.23.input_layernorm": false,
768
- "model.layers.23.post_attention_layernorm": false,
769
- "model.layers.24.self_attn.q_proj": {
770
- "group_size": 64,
771
- "bits": 3
772
- },
773
- "model.layers.24.self_attn.k_proj": {
774
- "group_size": 64,
775
- "bits": 3
776
- },
777
- "model.layers.24.self_attn.v_proj": {
778
- "group_size": 64,
779
- "bits": 4
780
- },
781
- "model.layers.24.self_attn.o_proj": {
782
- "group_size": 64,
783
- "bits": 3
784
- },
785
- "model.layers.24.self_attn.rope": false,
786
- "model.layers.24.mlp.gate_proj": {
787
- "group_size": 64,
788
- "bits": 3
789
- },
790
- "model.layers.24.mlp.down_proj": {
791
- "group_size": 64,
792
- "bits": 4
793
- },
794
- "model.layers.24.mlp.up_proj": {
795
- "group_size": 64,
796
- "bits": 3
797
- },
798
- "model.layers.24.input_layernorm": false,
799
- "model.layers.24.post_attention_layernorm": false,
800
- "model.layers.25.self_attn.q_proj": {
801
- "group_size": 64,
802
- "bits": 3
803
- },
804
- "model.layers.25.self_attn.k_proj": {
805
- "group_size": 64,
806
- "bits": 3
807
- },
808
- "model.layers.25.self_attn.v_proj": {
809
- "group_size": 64,
810
- "bits": 4
811
- },
812
- "model.layers.25.self_attn.o_proj": {
813
- "group_size": 64,
814
- "bits": 3
815
- },
816
- "model.layers.25.self_attn.rope": false,
817
- "model.layers.25.mlp.gate_proj": {
818
- "group_size": 64,
819
- "bits": 3
820
- },
821
- "model.layers.25.mlp.down_proj": {
822
- "group_size": 64,
823
- "bits": 4
824
- },
825
- "model.layers.25.mlp.up_proj": {
826
- "group_size": 64,
827
- "bits": 3
828
- },
829
- "model.layers.25.input_layernorm": false,
830
- "model.layers.25.post_attention_layernorm": false,
831
- "model.layers.26.self_attn.q_proj": {
832
- "group_size": 64,
833
- "bits": 3
834
- },
835
- "model.layers.26.self_attn.k_proj": {
836
- "group_size": 64,
837
- "bits": 3
838
- },
839
- "model.layers.26.self_attn.v_proj": {
840
- "group_size": 64,
841
- "bits": 4
842
- },
843
- "model.layers.26.self_attn.o_proj": {
844
- "group_size": 64,
845
- "bits": 3
846
- },
847
- "model.layers.26.self_attn.rope": false,
848
- "model.layers.26.mlp.gate_proj": {
849
- "group_size": 64,
850
- "bits": 3
851
- },
852
- "model.layers.26.mlp.down_proj": {
853
- "group_size": 64,
854
- "bits": 4
855
- },
856
- "model.layers.26.mlp.up_proj": {
857
- "group_size": 64,
858
- "bits": 3
859
- },
860
- "model.layers.26.input_layernorm": false,
861
- "model.layers.26.post_attention_layernorm": false,
862
- "model.layers.27.self_attn.q_proj": {
863
- "group_size": 64,
864
- "bits": 3
865
- },
866
- "model.layers.27.self_attn.k_proj": {
867
- "group_size": 64,
868
- "bits": 3
869
- },
870
- "model.layers.27.self_attn.v_proj": {
871
- "group_size": 64,
872
- "bits": 4
873
- },
874
- "model.layers.27.self_attn.o_proj": {
875
- "group_size": 64,
876
- "bits": 3
877
- },
878
- "model.layers.27.self_attn.rope": false,
879
- "model.layers.27.mlp.gate_proj": {
880
- "group_size": 64,
881
- "bits": 3
882
- },
883
- "model.layers.27.mlp.down_proj": {
884
- "group_size": 64,
885
- "bits": 4
886
- },
887
- "model.layers.27.mlp.up_proj": {
888
- "group_size": 64,
889
- "bits": 3
890
- },
891
- "model.layers.27.input_layernorm": false,
892
- "model.layers.27.post_attention_layernorm": false,
893
- "model.norm": false,
894
- "lm_head": {
895
- "group_size": 64,
896
- "bits": 4
897
- }
898
- },
899
- "quantization_config": {
900
- "group_size": 64,
901
- "bits": null,
902
- "model.embed_tokens": {
903
- "group_size": 64,
904
- "bits": 3
905
- },
906
- "model.layers.0.self_attn.q_proj": {
907
- "group_size": 64,
908
- "bits": 3
909
- },
910
- "model.layers.0.self_attn.k_proj": {
911
- "group_size": 64,
912
- "bits": 3
913
- },
914
- "model.layers.0.self_attn.v_proj": {
915
- "group_size": 64,
916
- "bits": 4
917
- },
918
- "model.layers.0.self_attn.o_proj": {
919
- "group_size": 64,
920
- "bits": 3
921
- },
922
- "model.layers.0.self_attn.rope": false,
923
- "model.layers.0.mlp.gate_proj": {
924
- "group_size": 64,
925
- "bits": 3
926
- },
927
- "model.layers.0.mlp.down_proj": {
928
- "group_size": 64,
929
- "bits": 4
930
- },
931
- "model.layers.0.mlp.up_proj": {
932
- "group_size": 64,
933
- "bits": 3
934
- },
935
- "model.layers.0.input_layernorm": false,
936
- "model.layers.0.post_attention_layernorm": false,
937
- "model.layers.1.self_attn.q_proj": {
938
- "group_size": 64,
939
- "bits": 3
940
- },
941
- "model.layers.1.self_attn.k_proj": {
942
- "group_size": 64,
943
- "bits": 3
944
- },
945
- "model.layers.1.self_attn.v_proj": {
946
- "group_size": 64,
947
- "bits": 4
948
- },
949
- "model.layers.1.self_attn.o_proj": {
950
- "group_size": 64,
951
- "bits": 3
952
- },
953
- "model.layers.1.self_attn.rope": false,
954
- "model.layers.1.mlp.gate_proj": {
955
- "group_size": 64,
956
- "bits": 3
957
- },
958
- "model.layers.1.mlp.down_proj": {
959
- "group_size": 64,
960
- "bits": 4
961
- },
962
- "model.layers.1.mlp.up_proj": {
963
- "group_size": 64,
964
- "bits": 3
965
- },
966
- "model.layers.1.input_layernorm": false,
967
- "model.layers.1.post_attention_layernorm": false,
968
- "model.layers.2.self_attn.q_proj": {
969
- "group_size": 64,
970
- "bits": 3
971
- },
972
- "model.layers.2.self_attn.k_proj": {
973
- "group_size": 64,
974
- "bits": 3
975
- },
976
- "model.layers.2.self_attn.v_proj": {
977
- "group_size": 64,
978
- "bits": 4
979
- },
980
- "model.layers.2.self_attn.o_proj": {
981
- "group_size": 64,
982
- "bits": 3
983
- },
984
- "model.layers.2.self_attn.rope": false,
985
- "model.layers.2.mlp.gate_proj": {
986
- "group_size": 64,
987
- "bits": 3
988
- },
989
- "model.layers.2.mlp.down_proj": {
990
- "group_size": 64,
991
- "bits": 4
992
- },
993
- "model.layers.2.mlp.up_proj": {
994
- "group_size": 64,
995
- "bits": 3
996
- },
997
- "model.layers.2.input_layernorm": false,
998
- "model.layers.2.post_attention_layernorm": false,
999
- "model.layers.3.self_attn.q_proj": {
1000
- "group_size": 64,
1001
- "bits": 3
1002
- },
1003
- "model.layers.3.self_attn.k_proj": {
1004
- "group_size": 64,
1005
- "bits": 3
1006
- },
1007
- "model.layers.3.self_attn.v_proj": {
1008
- "group_size": 64,
1009
- "bits": 3
1010
- },
1011
- "model.layers.3.self_attn.o_proj": {
1012
- "group_size": 64,
1013
- "bits": 3
1014
- },
1015
- "model.layers.3.self_attn.rope": false,
1016
- "model.layers.3.mlp.gate_proj": {
1017
- "group_size": 64,
1018
- "bits": 3
1019
- },
1020
- "model.layers.3.mlp.down_proj": {
1021
- "group_size": 64,
1022
- "bits": 3
1023
- },
1024
- "model.layers.3.mlp.up_proj": {
1025
- "group_size": 64,
1026
- "bits": 3
1027
- },
1028
- "model.layers.3.input_layernorm": false,
1029
- "model.layers.3.post_attention_layernorm": false,
1030
- "model.layers.4.self_attn.q_proj": {
1031
- "group_size": 64,
1032
- "bits": 3
1033
- },
1034
- "model.layers.4.self_attn.k_proj": {
1035
- "group_size": 64,
1036
- "bits": 3
1037
- },
1038
- "model.layers.4.self_attn.v_proj": {
1039
- "group_size": 64,
1040
- "bits": 3
1041
- },
1042
- "model.layers.4.self_attn.o_proj": {
1043
- "group_size": 64,
1044
- "bits": 3
1045
- },
1046
- "model.layers.4.self_attn.rope": false,
1047
- "model.layers.4.mlp.gate_proj": {
1048
- "group_size": 64,
1049
- "bits": 3
1050
- },
1051
- "model.layers.4.mlp.down_proj": {
1052
- "group_size": 64,
1053
- "bits": 3
1054
- },
1055
- "model.layers.4.mlp.up_proj": {
1056
- "group_size": 64,
1057
- "bits": 3
1058
- },
1059
- "model.layers.4.input_layernorm": false,
1060
- "model.layers.4.post_attention_layernorm": false,
1061
- "model.layers.5.self_attn.q_proj": {
1062
- "group_size": 64,
1063
- "bits": 3
1064
- },
1065
- "model.layers.5.self_attn.k_proj": {
1066
- "group_size": 64,
1067
- "bits": 3
1068
- },
1069
- "model.layers.5.self_attn.v_proj": {
1070
- "group_size": 64,
1071
- "bits": 4
1072
- },
1073
- "model.layers.5.self_attn.o_proj": {
1074
- "group_size": 64,
1075
- "bits": 3
1076
- },
1077
- "model.layers.5.self_attn.rope": false,
1078
- "model.layers.5.mlp.gate_proj": {
1079
- "group_size": 64,
1080
- "bits": 3
1081
- },
1082
- "model.layers.5.mlp.down_proj": {
1083
- "group_size": 64,
1084
- "bits": 4
1085
- },
1086
- "model.layers.5.mlp.up_proj": {
1087
- "group_size": 64,
1088
- "bits": 3
1089
- },
1090
- "model.layers.5.input_layernorm": false,
1091
- "model.layers.5.post_attention_layernorm": false,
1092
- "model.layers.6.self_attn.q_proj": {
1093
- "group_size": 64,
1094
- "bits": 3
1095
- },
1096
- "model.layers.6.self_attn.k_proj": {
1097
- "group_size": 64,
1098
- "bits": 3
1099
- },
1100
- "model.layers.6.self_attn.v_proj": {
1101
- "group_size": 64,
1102
- "bits": 3
1103
- },
1104
- "model.layers.6.self_attn.o_proj": {
1105
- "group_size": 64,
1106
- "bits": 3
1107
- },
1108
- "model.layers.6.self_attn.rope": false,
1109
- "model.layers.6.mlp.gate_proj": {
1110
- "group_size": 64,
1111
- "bits": 3
1112
- },
1113
- "model.layers.6.mlp.down_proj": {
1114
- "group_size": 64,
1115
- "bits": 3
1116
- },
1117
- "model.layers.6.mlp.up_proj": {
1118
- "group_size": 64,
1119
- "bits": 3
1120
- },
1121
- "model.layers.6.input_layernorm": false,
1122
- "model.layers.6.post_attention_layernorm": false,
1123
- "model.layers.7.self_attn.q_proj": {
1124
- "group_size": 64,
1125
- "bits": 3
1126
- },
1127
- "model.layers.7.self_attn.k_proj": {
1128
- "group_size": 64,
1129
- "bits": 3
1130
- },
1131
- "model.layers.7.self_attn.v_proj": {
1132
- "group_size": 64,
1133
- "bits": 3
1134
- },
1135
- "model.layers.7.self_attn.o_proj": {
1136
- "group_size": 64,
1137
- "bits": 3
1138
- },
1139
- "model.layers.7.self_attn.rope": false,
1140
- "model.layers.7.mlp.gate_proj": {
1141
- "group_size": 64,
1142
- "bits": 3
1143
- },
1144
- "model.layers.7.mlp.down_proj": {
1145
- "group_size": 64,
1146
- "bits": 3
1147
- },
1148
- "model.layers.7.mlp.up_proj": {
1149
- "group_size": 64,
1150
- "bits": 3
1151
- },
1152
- "model.layers.7.input_layernorm": false,
1153
- "model.layers.7.post_attention_layernorm": false,
1154
- "model.layers.8.self_attn.q_proj": {
1155
- "group_size": 64,
1156
- "bits": 3
1157
- },
1158
- "model.layers.8.self_attn.k_proj": {
1159
- "group_size": 64,
1160
- "bits": 3
1161
- },
1162
- "model.layers.8.self_attn.v_proj": {
1163
- "group_size": 64,
1164
- "bits": 4
1165
- },
1166
- "model.layers.8.self_attn.o_proj": {
1167
- "group_size": 64,
1168
- "bits": 3
1169
- },
1170
- "model.layers.8.self_attn.rope": false,
1171
- "model.layers.8.mlp.gate_proj": {
1172
- "group_size": 64,
1173
- "bits": 3
1174
- },
1175
- "model.layers.8.mlp.down_proj": {
1176
- "group_size": 64,
1177
- "bits": 4
1178
- },
1179
- "model.layers.8.mlp.up_proj": {
1180
- "group_size": 64,
1181
- "bits": 3
1182
- },
1183
- "model.layers.8.input_layernorm": false,
1184
- "model.layers.8.post_attention_layernorm": false,
1185
- "model.layers.9.self_attn.q_proj": {
1186
- "group_size": 64,
1187
- "bits": 3
1188
- },
1189
- "model.layers.9.self_attn.k_proj": {
1190
- "group_size": 64,
1191
- "bits": 3
1192
- },
1193
- "model.layers.9.self_attn.v_proj": {
1194
- "group_size": 64,
1195
- "bits": 3
1196
- },
1197
- "model.layers.9.self_attn.o_proj": {
1198
- "group_size": 64,
1199
- "bits": 3
1200
- },
1201
- "model.layers.9.self_attn.rope": false,
1202
- "model.layers.9.mlp.gate_proj": {
1203
- "group_size": 64,
1204
- "bits": 3
1205
- },
1206
- "model.layers.9.mlp.down_proj": {
1207
- "group_size": 64,
1208
- "bits": 3
1209
- },
1210
- "model.layers.9.mlp.up_proj": {
1211
- "group_size": 64,
1212
- "bits": 3
1213
- },
1214
- "model.layers.9.input_layernorm": false,
1215
- "model.layers.9.post_attention_layernorm": false,
1216
- "model.layers.10.self_attn.q_proj": {
1217
- "group_size": 64,
1218
- "bits": 3
1219
- },
1220
- "model.layers.10.self_attn.k_proj": {
1221
- "group_size": 64,
1222
- "bits": 3
1223
- },
1224
- "model.layers.10.self_attn.v_proj": {
1225
- "group_size": 64,
1226
- "bits": 3
1227
- },
1228
- "model.layers.10.self_attn.o_proj": {
1229
- "group_size": 64,
1230
- "bits": 3
1231
- },
1232
- "model.layers.10.self_attn.rope": false,
1233
- "model.layers.10.mlp.gate_proj": {
1234
- "group_size": 64,
1235
- "bits": 3
1236
- },
1237
- "model.layers.10.mlp.down_proj": {
1238
- "group_size": 64,
1239
- "bits": 3
1240
- },
1241
- "model.layers.10.mlp.up_proj": {
1242
- "group_size": 64,
1243
- "bits": 3
1244
- },
1245
- "model.layers.10.input_layernorm": false,
1246
- "model.layers.10.post_attention_layernorm": false,
1247
- "model.layers.11.self_attn.q_proj": {
1248
- "group_size": 64,
1249
- "bits": 3
1250
- },
1251
- "model.layers.11.self_attn.k_proj": {
1252
- "group_size": 64,
1253
- "bits": 3
1254
- },
1255
- "model.layers.11.self_attn.v_proj": {
1256
- "group_size": 64,
1257
- "bits": 4
1258
- },
1259
- "model.layers.11.self_attn.o_proj": {
1260
- "group_size": 64,
1261
- "bits": 3
1262
- },
1263
- "model.layers.11.self_attn.rope": false,
1264
- "model.layers.11.mlp.gate_proj": {
1265
- "group_size": 64,
1266
- "bits": 3
1267
- },
1268
- "model.layers.11.mlp.down_proj": {
1269
- "group_size": 64,
1270
- "bits": 4
1271
- },
1272
- "model.layers.11.mlp.up_proj": {
1273
- "group_size": 64,
1274
- "bits": 3
1275
- },
1276
- "model.layers.11.input_layernorm": false,
1277
- "model.layers.11.post_attention_layernorm": false,
1278
- "model.layers.12.self_attn.q_proj": {
1279
- "group_size": 64,
1280
- "bits": 3
1281
- },
1282
- "model.layers.12.self_attn.k_proj": {
1283
- "group_size": 64,
1284
- "bits": 3
1285
- },
1286
- "model.layers.12.self_attn.v_proj": {
1287
- "group_size": 64,
1288
- "bits": 3
1289
- },
1290
- "model.layers.12.self_attn.o_proj": {
1291
- "group_size": 64,
1292
- "bits": 3
1293
- },
1294
- "model.layers.12.self_attn.rope": false,
1295
- "model.layers.12.mlp.gate_proj": {
1296
- "group_size": 64,
1297
- "bits": 3
1298
- },
1299
- "model.layers.12.mlp.down_proj": {
1300
- "group_size": 64,
1301
- "bits": 3
1302
- },
1303
- "model.layers.12.mlp.up_proj": {
1304
- "group_size": 64,
1305
- "bits": 3
1306
- },
1307
- "model.layers.12.input_layernorm": false,
1308
- "model.layers.12.post_attention_layernorm": false,
1309
- "model.layers.13.self_attn.q_proj": {
1310
- "group_size": 64,
1311
- "bits": 3
1312
- },
1313
- "model.layers.13.self_attn.k_proj": {
1314
- "group_size": 64,
1315
- "bits": 3
1316
- },
1317
- "model.layers.13.self_attn.v_proj": {
1318
- "group_size": 64,
1319
- "bits": 3
1320
- },
1321
- "model.layers.13.self_attn.o_proj": {
1322
- "group_size": 64,
1323
- "bits": 3
1324
- },
1325
- "model.layers.13.self_attn.rope": false,
1326
- "model.layers.13.mlp.gate_proj": {
1327
- "group_size": 64,
1328
- "bits": 3
1329
- },
1330
- "model.layers.13.mlp.down_proj": {
1331
- "group_size": 64,
1332
- "bits": 3
1333
- },
1334
- "model.layers.13.mlp.up_proj": {
1335
- "group_size": 64,
1336
- "bits": 3
1337
- },
1338
- "model.layers.13.input_layernorm": false,
1339
- "model.layers.13.post_attention_layernorm": false,
1340
- "model.layers.14.self_attn.q_proj": {
1341
- "group_size": 64,
1342
- "bits": 3
1343
- },
1344
- "model.layers.14.self_attn.k_proj": {
1345
- "group_size": 64,
1346
- "bits": 3
1347
- },
1348
- "model.layers.14.self_attn.v_proj": {
1349
- "group_size": 64,
1350
- "bits": 4
1351
- },
1352
- "model.layers.14.self_attn.o_proj": {
1353
- "group_size": 64,
1354
- "bits": 3
1355
- },
1356
- "model.layers.14.self_attn.rope": false,
1357
- "model.layers.14.mlp.gate_proj": {
1358
- "group_size": 64,
1359
- "bits": 3
1360
- },
1361
- "model.layers.14.mlp.down_proj": {
1362
- "group_size": 64,
1363
- "bits": 4
1364
- },
1365
- "model.layers.14.mlp.up_proj": {
1366
- "group_size": 64,
1367
- "bits": 3
1368
- },
1369
- "model.layers.14.input_layernorm": false,
1370
- "model.layers.14.post_attention_layernorm": false,
1371
- "model.layers.15.self_attn.q_proj": {
1372
- "group_size": 64,
1373
- "bits": 3
1374
- },
1375
- "model.layers.15.self_attn.k_proj": {
1376
- "group_size": 64,
1377
- "bits": 3
1378
- },
1379
- "model.layers.15.self_attn.v_proj": {
1380
- "group_size": 64,
1381
- "bits": 3
1382
- },
1383
- "model.layers.15.self_attn.o_proj": {
1384
- "group_size": 64,
1385
- "bits": 3
1386
- },
1387
- "model.layers.15.self_attn.rope": false,
1388
- "model.layers.15.mlp.gate_proj": {
1389
- "group_size": 64,
1390
- "bits": 3
1391
- },
1392
- "model.layers.15.mlp.down_proj": {
1393
- "group_size": 64,
1394
- "bits": 3
1395
- },
1396
- "model.layers.15.mlp.up_proj": {
1397
- "group_size": 64,
1398
- "bits": 3
1399
- },
1400
- "model.layers.15.input_layernorm": false,
1401
- "model.layers.15.post_attention_layernorm": false,
1402
- "model.layers.16.self_attn.q_proj": {
1403
- "group_size": 64,
1404
- "bits": 3
1405
- },
1406
- "model.layers.16.self_attn.k_proj": {
1407
- "group_size": 64,
1408
- "bits": 3
1409
- },
1410
- "model.layers.16.self_attn.v_proj": {
1411
- "group_size": 64,
1412
- "bits": 3
1413
- },
1414
- "model.layers.16.self_attn.o_proj": {
1415
- "group_size": 64,
1416
- "bits": 3
1417
- },
1418
- "model.layers.16.self_attn.rope": false,
1419
- "model.layers.16.mlp.gate_proj": {
1420
- "group_size": 64,
1421
- "bits": 3
1422
- },
1423
- "model.layers.16.mlp.down_proj": {
1424
- "group_size": 64,
1425
- "bits": 3
1426
- },
1427
- "model.layers.16.mlp.up_proj": {
1428
- "group_size": 64,
1429
- "bits": 3
1430
- },
1431
- "model.layers.16.input_layernorm": false,
1432
- "model.layers.16.post_attention_layernorm": false,
1433
- "model.layers.17.self_attn.q_proj": {
1434
- "group_size": 64,
1435
- "bits": 3
1436
- },
1437
- "model.layers.17.self_attn.k_proj": {
1438
- "group_size": 64,
1439
- "bits": 3
1440
- },
1441
- "model.layers.17.self_attn.v_proj": {
1442
- "group_size": 64,
1443
- "bits": 4
1444
- },
1445
- "model.layers.17.self_attn.o_proj": {
1446
- "group_size": 64,
1447
- "bits": 3
1448
- },
1449
- "model.layers.17.self_attn.rope": false,
1450
- "model.layers.17.mlp.gate_proj": {
1451
- "group_size": 64,
1452
- "bits": 3
1453
- },
1454
- "model.layers.17.mlp.down_proj": {
1455
- "group_size": 64,
1456
- "bits": 4
1457
- },
1458
- "model.layers.17.mlp.up_proj": {
1459
- "group_size": 64,
1460
- "bits": 3
1461
- },
1462
- "model.layers.17.input_layernorm": false,
1463
- "model.layers.17.post_attention_layernorm": false,
1464
- "model.layers.18.self_attn.q_proj": {
1465
- "group_size": 64,
1466
- "bits": 3
1467
- },
1468
- "model.layers.18.self_attn.k_proj": {
1469
- "group_size": 64,
1470
- "bits": 3
1471
- },
1472
- "model.layers.18.self_attn.v_proj": {
1473
- "group_size": 64,
1474
- "bits": 3
1475
- },
1476
- "model.layers.18.self_attn.o_proj": {
1477
- "group_size": 64,
1478
- "bits": 3
1479
- },
1480
- "model.layers.18.self_attn.rope": false,
1481
- "model.layers.18.mlp.gate_proj": {
1482
- "group_size": 64,
1483
- "bits": 3
1484
- },
1485
- "model.layers.18.mlp.down_proj": {
1486
- "group_size": 64,
1487
- "bits": 3
1488
- },
1489
- "model.layers.18.mlp.up_proj": {
1490
- "group_size": 64,
1491
- "bits": 3
1492
- },
1493
- "model.layers.18.input_layernorm": false,
1494
- "model.layers.18.post_attention_layernorm": false,
1495
- "model.layers.19.self_attn.q_proj": {
1496
- "group_size": 64,
1497
- "bits": 3
1498
- },
1499
- "model.layers.19.self_attn.k_proj": {
1500
- "group_size": 64,
1501
- "bits": 3
1502
- },
1503
- "model.layers.19.self_attn.v_proj": {
1504
- "group_size": 64,
1505
- "bits": 3
1506
- },
1507
- "model.layers.19.self_attn.o_proj": {
1508
- "group_size": 64,
1509
- "bits": 3
1510
- },
1511
- "model.layers.19.self_attn.rope": false,
1512
- "model.layers.19.mlp.gate_proj": {
1513
- "group_size": 64,
1514
- "bits": 3
1515
- },
1516
- "model.layers.19.mlp.down_proj": {
1517
- "group_size": 64,
1518
- "bits": 3
1519
- },
1520
- "model.layers.19.mlp.up_proj": {
1521
- "group_size": 64,
1522
- "bits": 3
1523
- },
1524
- "model.layers.19.input_layernorm": false,
1525
- "model.layers.19.post_attention_layernorm": false,
1526
- "model.layers.20.self_attn.q_proj": {
1527
- "group_size": 64,
1528
- "bits": 3
1529
- },
1530
- "model.layers.20.self_attn.k_proj": {
1531
- "group_size": 64,
1532
- "bits": 3
1533
- },
1534
- "model.layers.20.self_attn.v_proj": {
1535
- "group_size": 64,
1536
- "bits": 4
1537
- },
1538
- "model.layers.20.self_attn.o_proj": {
1539
- "group_size": 64,
1540
- "bits": 3
1541
- },
1542
- "model.layers.20.self_attn.rope": false,
1543
- "model.layers.20.mlp.gate_proj": {
1544
- "group_size": 64,
1545
- "bits": 3
1546
- },
1547
- "model.layers.20.mlp.down_proj": {
1548
- "group_size": 64,
1549
- "bits": 4
1550
- },
1551
- "model.layers.20.mlp.up_proj": {
1552
- "group_size": 64,
1553
- "bits": 3
1554
- },
1555
- "model.layers.20.input_layernorm": false,
1556
- "model.layers.20.post_attention_layernorm": false,
1557
- "model.layers.21.self_attn.q_proj": {
1558
- "group_size": 64,
1559
- "bits": 3
1560
- },
1561
- "model.layers.21.self_attn.k_proj": {
1562
- "group_size": 64,
1563
- "bits": 3
1564
- },
1565
- "model.layers.21.self_attn.v_proj": {
1566
- "group_size": 64,
1567
- "bits": 3
1568
- },
1569
- "model.layers.21.self_attn.o_proj": {
1570
- "group_size": 64,
1571
- "bits": 3
1572
- },
1573
- "model.layers.21.self_attn.rope": false,
1574
- "model.layers.21.mlp.gate_proj": {
1575
- "group_size": 64,
1576
- "bits": 3
1577
- },
1578
- "model.layers.21.mlp.down_proj": {
1579
- "group_size": 64,
1580
- "bits": 3
1581
- },
1582
- "model.layers.21.mlp.up_proj": {
1583
- "group_size": 64,
1584
- "bits": 3
1585
- },
1586
- "model.layers.21.input_layernorm": false,
1587
- "model.layers.21.post_attention_layernorm": false,
1588
- "model.layers.22.self_attn.q_proj": {
1589
- "group_size": 64,
1590
- "bits": 3
1591
- },
1592
- "model.layers.22.self_attn.k_proj": {
1593
- "group_size": 64,
1594
- "bits": 3
1595
- },
1596
- "model.layers.22.self_attn.v_proj": {
1597
- "group_size": 64,
1598
- "bits": 3
1599
- },
1600
- "model.layers.22.self_attn.o_proj": {
1601
- "group_size": 64,
1602
- "bits": 3
1603
- },
1604
- "model.layers.22.self_attn.rope": false,
1605
- "model.layers.22.mlp.gate_proj": {
1606
- "group_size": 64,
1607
- "bits": 3
1608
- },
1609
- "model.layers.22.mlp.down_proj": {
1610
- "group_size": 64,
1611
- "bits": 3
1612
- },
1613
- "model.layers.22.mlp.up_proj": {
1614
- "group_size": 64,
1615
- "bits": 3
1616
- },
1617
- "model.layers.22.input_layernorm": false,
1618
- "model.layers.22.post_attention_layernorm": false,
1619
- "model.layers.23.self_attn.q_proj": {
1620
- "group_size": 64,
1621
- "bits": 3
1622
- },
1623
- "model.layers.23.self_attn.k_proj": {
1624
- "group_size": 64,
1625
- "bits": 3
1626
- },
1627
- "model.layers.23.self_attn.v_proj": {
1628
- "group_size": 64,
1629
- "bits": 4
1630
- },
1631
- "model.layers.23.self_attn.o_proj": {
1632
- "group_size": 64,
1633
- "bits": 3
1634
- },
1635
- "model.layers.23.self_attn.rope": false,
1636
- "model.layers.23.mlp.gate_proj": {
1637
- "group_size": 64,
1638
- "bits": 3
1639
- },
1640
- "model.layers.23.mlp.down_proj": {
1641
- "group_size": 64,
1642
- "bits": 4
1643
- },
1644
- "model.layers.23.mlp.up_proj": {
1645
- "group_size": 64,
1646
- "bits": 3
1647
- },
1648
- "model.layers.23.input_layernorm": false,
1649
- "model.layers.23.post_attention_layernorm": false,
1650
- "model.layers.24.self_attn.q_proj": {
1651
- "group_size": 64,
1652
- "bits": 3
1653
- },
1654
- "model.layers.24.self_attn.k_proj": {
1655
- "group_size": 64,
1656
- "bits": 3
1657
- },
1658
- "model.layers.24.self_attn.v_proj": {
1659
- "group_size": 64,
1660
- "bits": 4
1661
- },
1662
- "model.layers.24.self_attn.o_proj": {
1663
- "group_size": 64,
1664
- "bits": 3
1665
- },
1666
- "model.layers.24.self_attn.rope": false,
1667
- "model.layers.24.mlp.gate_proj": {
1668
- "group_size": 64,
1669
- "bits": 3
1670
- },
1671
- "model.layers.24.mlp.down_proj": {
1672
- "group_size": 64,
1673
- "bits": 4
1674
- },
1675
- "model.layers.24.mlp.up_proj": {
1676
- "group_size": 64,
1677
- "bits": 3
1678
- },
1679
- "model.layers.24.input_layernorm": false,
1680
- "model.layers.24.post_attention_layernorm": false,
1681
- "model.layers.25.self_attn.q_proj": {
1682
- "group_size": 64,
1683
- "bits": 3
1684
- },
1685
- "model.layers.25.self_attn.k_proj": {
1686
- "group_size": 64,
1687
- "bits": 3
1688
- },
1689
- "model.layers.25.self_attn.v_proj": {
1690
- "group_size": 64,
1691
- "bits": 4
1692
- },
1693
- "model.layers.25.self_attn.o_proj": {
1694
- "group_size": 64,
1695
- "bits": 3
1696
- },
1697
- "model.layers.25.self_attn.rope": false,
1698
- "model.layers.25.mlp.gate_proj": {
1699
- "group_size": 64,
1700
- "bits": 3
1701
- },
1702
- "model.layers.25.mlp.down_proj": {
1703
- "group_size": 64,
1704
- "bits": 4
1705
- },
1706
- "model.layers.25.mlp.up_proj": {
1707
- "group_size": 64,
1708
- "bits": 3
1709
- },
1710
- "model.layers.25.input_layernorm": false,
1711
- "model.layers.25.post_attention_layernorm": false,
1712
- "model.layers.26.self_attn.q_proj": {
1713
- "group_size": 64,
1714
- "bits": 3
1715
- },
1716
- "model.layers.26.self_attn.k_proj": {
1717
- "group_size": 64,
1718
- "bits": 3
1719
- },
1720
- "model.layers.26.self_attn.v_proj": {
1721
- "group_size": 64,
1722
- "bits": 4
1723
- },
1724
- "model.layers.26.self_attn.o_proj": {
1725
- "group_size": 64,
1726
- "bits": 3
1727
- },
1728
- "model.layers.26.self_attn.rope": false,
1729
- "model.layers.26.mlp.gate_proj": {
1730
- "group_size": 64,
1731
- "bits": 3
1732
- },
1733
- "model.layers.26.mlp.down_proj": {
1734
- "group_size": 64,
1735
- "bits": 4
1736
- },
1737
- "model.layers.26.mlp.up_proj": {
1738
- "group_size": 64,
1739
- "bits": 3
1740
- },
1741
- "model.layers.26.input_layernorm": false,
1742
- "model.layers.26.post_attention_layernorm": false,
1743
- "model.layers.27.self_attn.q_proj": {
1744
- "group_size": 64,
1745
- "bits": 3
1746
- },
1747
- "model.layers.27.self_attn.k_proj": {
1748
- "group_size": 64,
1749
- "bits": 3
1750
- },
1751
- "model.layers.27.self_attn.v_proj": {
1752
- "group_size": 64,
1753
- "bits": 4
1754
- },
1755
- "model.layers.27.self_attn.o_proj": {
1756
- "group_size": 64,
1757
- "bits": 3
1758
- },
1759
- "model.layers.27.self_attn.rope": false,
1760
- "model.layers.27.mlp.gate_proj": {
1761
- "group_size": 64,
1762
- "bits": 3
1763
- },
1764
- "model.layers.27.mlp.down_proj": {
1765
- "group_size": 64,
1766
- "bits": 4
1767
- },
1768
- "model.layers.27.mlp.up_proj": {
1769
- "group_size": 64,
1770
- "bits": 3
1771
- },
1772
- "model.layers.27.input_layernorm": false,
1773
- "model.layers.27.post_attention_layernorm": false,
1774
- "model.norm": false,
1775
- "lm_head": {
1776
- "group_size": 64,
1777
- "bits": 4
1778
- }
1779
- },
1780
- "rms_norm_eps": 1e-06,
1781
- "rope_theta": 10000,
1782
- "sliding_window": 4096,
1783
- "tie_word_embeddings": false,
1784
- "torch_dtype": "bfloat16",
1785
- "transformers_version": "4.44.0",
1786
- "use_cache": true,
1787
- "use_mrope": false,
1788
- "use_sliding_window": false,
1789
- "vocab_size": 152064
1790
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c15fdbbbe929593ed3932fd81869fc4d933469b2db5330acf52a4b43d9975143
3
- size 3522588433
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/model.safetensors.index.json DELETED
@@ -1,742 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 3522505728
4
- },
5
- "weight_map": {
6
- "lm_head.biases": "model.safetensors",
7
- "lm_head.scales": "model.safetensors",
8
- "lm_head.weight": "model.safetensors",
9
- "model.embed_tokens.biases": "model.safetensors",
10
- "model.embed_tokens.scales": "model.safetensors",
11
- "model.embed_tokens.weight": "model.safetensors",
12
- "model.layers.0.input_layernorm.weight": "model.safetensors",
13
- "model.layers.0.mlp.down_proj.biases": "model.safetensors",
14
- "model.layers.0.mlp.down_proj.scales": "model.safetensors",
15
- "model.layers.0.mlp.down_proj.weight": "model.safetensors",
16
- "model.layers.0.mlp.gate_proj.biases": "model.safetensors",
17
- "model.layers.0.mlp.gate_proj.scales": "model.safetensors",
18
- "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
19
- "model.layers.0.mlp.up_proj.biases": "model.safetensors",
20
- "model.layers.0.mlp.up_proj.scales": "model.safetensors",
21
- "model.layers.0.mlp.up_proj.weight": "model.safetensors",
22
- "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
23
- "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
24
- "model.layers.0.self_attn.k_proj.biases": "model.safetensors",
25
- "model.layers.0.self_attn.k_proj.scales": "model.safetensors",
26
- "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
27
- "model.layers.0.self_attn.o_proj.biases": "model.safetensors",
28
- "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
29
- "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
30
- "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
31
- "model.layers.0.self_attn.q_proj.biases": "model.safetensors",
32
- "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
33
- "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
34
- "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
35
- "model.layers.0.self_attn.v_proj.biases": "model.safetensors",
36
- "model.layers.0.self_attn.v_proj.scales": "model.safetensors",
37
- "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
38
- "model.layers.1.input_layernorm.weight": "model.safetensors",
39
- "model.layers.1.mlp.down_proj.biases": "model.safetensors",
40
- "model.layers.1.mlp.down_proj.scales": "model.safetensors",
41
- "model.layers.1.mlp.down_proj.weight": "model.safetensors",
42
- "model.layers.1.mlp.gate_proj.biases": "model.safetensors",
43
- "model.layers.1.mlp.gate_proj.scales": "model.safetensors",
44
- "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
45
- "model.layers.1.mlp.up_proj.biases": "model.safetensors",
46
- "model.layers.1.mlp.up_proj.scales": "model.safetensors",
47
- "model.layers.1.mlp.up_proj.weight": "model.safetensors",
48
- "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
49
- "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
50
- "model.layers.1.self_attn.k_proj.biases": "model.safetensors",
51
- "model.layers.1.self_attn.k_proj.scales": "model.safetensors",
52
- "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
53
- "model.layers.1.self_attn.o_proj.biases": "model.safetensors",
54
- "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
55
- "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
56
- "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
57
- "model.layers.1.self_attn.q_proj.biases": "model.safetensors",
58
- "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
59
- "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
60
- "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
61
- "model.layers.1.self_attn.v_proj.biases": "model.safetensors",
62
- "model.layers.1.self_attn.v_proj.scales": "model.safetensors",
63
- "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
64
- "model.layers.10.input_layernorm.weight": "model.safetensors",
65
- "model.layers.10.mlp.down_proj.biases": "model.safetensors",
66
- "model.layers.10.mlp.down_proj.scales": "model.safetensors",
67
- "model.layers.10.mlp.down_proj.weight": "model.safetensors",
68
- "model.layers.10.mlp.gate_proj.biases": "model.safetensors",
69
- "model.layers.10.mlp.gate_proj.scales": "model.safetensors",
70
- "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
71
- "model.layers.10.mlp.up_proj.biases": "model.safetensors",
72
- "model.layers.10.mlp.up_proj.scales": "model.safetensors",
73
- "model.layers.10.mlp.up_proj.weight": "model.safetensors",
74
- "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
75
- "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
76
- "model.layers.10.self_attn.k_proj.biases": "model.safetensors",
77
- "model.layers.10.self_attn.k_proj.scales": "model.safetensors",
78
- "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
79
- "model.layers.10.self_attn.o_proj.biases": "model.safetensors",
80
- "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
81
- "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
82
- "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
83
- "model.layers.10.self_attn.q_proj.biases": "model.safetensors",
84
- "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
85
- "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
86
- "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
87
- "model.layers.10.self_attn.v_proj.biases": "model.safetensors",
88
- "model.layers.10.self_attn.v_proj.scales": "model.safetensors",
89
- "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
90
- "model.layers.11.input_layernorm.weight": "model.safetensors",
91
- "model.layers.11.mlp.down_proj.biases": "model.safetensors",
92
- "model.layers.11.mlp.down_proj.scales": "model.safetensors",
93
- "model.layers.11.mlp.down_proj.weight": "model.safetensors",
94
- "model.layers.11.mlp.gate_proj.biases": "model.safetensors",
95
- "model.layers.11.mlp.gate_proj.scales": "model.safetensors",
96
- "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
97
- "model.layers.11.mlp.up_proj.biases": "model.safetensors",
98
- "model.layers.11.mlp.up_proj.scales": "model.safetensors",
99
- "model.layers.11.mlp.up_proj.weight": "model.safetensors",
100
- "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
101
- "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
102
- "model.layers.11.self_attn.k_proj.biases": "model.safetensors",
103
- "model.layers.11.self_attn.k_proj.scales": "model.safetensors",
104
- "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
105
- "model.layers.11.self_attn.o_proj.biases": "model.safetensors",
106
- "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
107
- "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
108
- "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
109
- "model.layers.11.self_attn.q_proj.biases": "model.safetensors",
110
- "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
111
- "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
112
- "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
113
- "model.layers.11.self_attn.v_proj.biases": "model.safetensors",
114
- "model.layers.11.self_attn.v_proj.scales": "model.safetensors",
115
- "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
116
- "model.layers.12.input_layernorm.weight": "model.safetensors",
117
- "model.layers.12.mlp.down_proj.biases": "model.safetensors",
118
- "model.layers.12.mlp.down_proj.scales": "model.safetensors",
119
- "model.layers.12.mlp.down_proj.weight": "model.safetensors",
120
- "model.layers.12.mlp.gate_proj.biases": "model.safetensors",
121
- "model.layers.12.mlp.gate_proj.scales": "model.safetensors",
122
- "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
123
- "model.layers.12.mlp.up_proj.biases": "model.safetensors",
124
- "model.layers.12.mlp.up_proj.scales": "model.safetensors",
125
- "model.layers.12.mlp.up_proj.weight": "model.safetensors",
126
- "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
127
- "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
128
- "model.layers.12.self_attn.k_proj.biases": "model.safetensors",
129
- "model.layers.12.self_attn.k_proj.scales": "model.safetensors",
130
- "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
131
- "model.layers.12.self_attn.o_proj.biases": "model.safetensors",
132
- "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
133
- "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
134
- "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
135
- "model.layers.12.self_attn.q_proj.biases": "model.safetensors",
136
- "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
137
- "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
138
- "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
139
- "model.layers.12.self_attn.v_proj.biases": "model.safetensors",
140
- "model.layers.12.self_attn.v_proj.scales": "model.safetensors",
141
- "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
142
- "model.layers.13.input_layernorm.weight": "model.safetensors",
143
- "model.layers.13.mlp.down_proj.biases": "model.safetensors",
144
- "model.layers.13.mlp.down_proj.scales": "model.safetensors",
145
- "model.layers.13.mlp.down_proj.weight": "model.safetensors",
146
- "model.layers.13.mlp.gate_proj.biases": "model.safetensors",
147
- "model.layers.13.mlp.gate_proj.scales": "model.safetensors",
148
- "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
149
- "model.layers.13.mlp.up_proj.biases": "model.safetensors",
150
- "model.layers.13.mlp.up_proj.scales": "model.safetensors",
151
- "model.layers.13.mlp.up_proj.weight": "model.safetensors",
152
- "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
153
- "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
154
- "model.layers.13.self_attn.k_proj.biases": "model.safetensors",
155
- "model.layers.13.self_attn.k_proj.scales": "model.safetensors",
156
- "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
157
- "model.layers.13.self_attn.o_proj.biases": "model.safetensors",
158
- "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
159
- "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
160
- "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
161
- "model.layers.13.self_attn.q_proj.biases": "model.safetensors",
162
- "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
163
- "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
164
- "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
165
- "model.layers.13.self_attn.v_proj.biases": "model.safetensors",
166
- "model.layers.13.self_attn.v_proj.scales": "model.safetensors",
167
- "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
168
- "model.layers.14.input_layernorm.weight": "model.safetensors",
169
- "model.layers.14.mlp.down_proj.biases": "model.safetensors",
170
- "model.layers.14.mlp.down_proj.scales": "model.safetensors",
171
- "model.layers.14.mlp.down_proj.weight": "model.safetensors",
172
- "model.layers.14.mlp.gate_proj.biases": "model.safetensors",
173
- "model.layers.14.mlp.gate_proj.scales": "model.safetensors",
174
- "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
175
- "model.layers.14.mlp.up_proj.biases": "model.safetensors",
176
- "model.layers.14.mlp.up_proj.scales": "model.safetensors",
177
- "model.layers.14.mlp.up_proj.weight": "model.safetensors",
178
- "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
179
- "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
180
- "model.layers.14.self_attn.k_proj.biases": "model.safetensors",
181
- "model.layers.14.self_attn.k_proj.scales": "model.safetensors",
182
- "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
183
- "model.layers.14.self_attn.o_proj.biases": "model.safetensors",
184
- "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
185
- "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
186
- "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
187
- "model.layers.14.self_attn.q_proj.biases": "model.safetensors",
188
- "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
189
- "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
190
- "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
191
- "model.layers.14.self_attn.v_proj.biases": "model.safetensors",
192
- "model.layers.14.self_attn.v_proj.scales": "model.safetensors",
193
- "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
194
- "model.layers.15.input_layernorm.weight": "model.safetensors",
195
- "model.layers.15.mlp.down_proj.biases": "model.safetensors",
196
- "model.layers.15.mlp.down_proj.scales": "model.safetensors",
197
- "model.layers.15.mlp.down_proj.weight": "model.safetensors",
198
- "model.layers.15.mlp.gate_proj.biases": "model.safetensors",
199
- "model.layers.15.mlp.gate_proj.scales": "model.safetensors",
200
- "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
201
- "model.layers.15.mlp.up_proj.biases": "model.safetensors",
202
- "model.layers.15.mlp.up_proj.scales": "model.safetensors",
203
- "model.layers.15.mlp.up_proj.weight": "model.safetensors",
204
- "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
205
- "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
206
- "model.layers.15.self_attn.k_proj.biases": "model.safetensors",
207
- "model.layers.15.self_attn.k_proj.scales": "model.safetensors",
208
- "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
209
- "model.layers.15.self_attn.o_proj.biases": "model.safetensors",
210
- "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
211
- "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
212
- "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
213
- "model.layers.15.self_attn.q_proj.biases": "model.safetensors",
214
- "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
215
- "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
216
- "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
217
- "model.layers.15.self_attn.v_proj.biases": "model.safetensors",
218
- "model.layers.15.self_attn.v_proj.scales": "model.safetensors",
219
- "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
220
- "model.layers.16.input_layernorm.weight": "model.safetensors",
221
- "model.layers.16.mlp.down_proj.biases": "model.safetensors",
222
- "model.layers.16.mlp.down_proj.scales": "model.safetensors",
223
- "model.layers.16.mlp.down_proj.weight": "model.safetensors",
224
- "model.layers.16.mlp.gate_proj.biases": "model.safetensors",
225
- "model.layers.16.mlp.gate_proj.scales": "model.safetensors",
226
- "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
227
- "model.layers.16.mlp.up_proj.biases": "model.safetensors",
228
- "model.layers.16.mlp.up_proj.scales": "model.safetensors",
229
- "model.layers.16.mlp.up_proj.weight": "model.safetensors",
230
- "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
231
- "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
232
- "model.layers.16.self_attn.k_proj.biases": "model.safetensors",
233
- "model.layers.16.self_attn.k_proj.scales": "model.safetensors",
234
- "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
235
- "model.layers.16.self_attn.o_proj.biases": "model.safetensors",
236
- "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
237
- "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
238
- "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
239
- "model.layers.16.self_attn.q_proj.biases": "model.safetensors",
240
- "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
241
- "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
242
- "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
243
- "model.layers.16.self_attn.v_proj.biases": "model.safetensors",
244
- "model.layers.16.self_attn.v_proj.scales": "model.safetensors",
245
- "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
246
- "model.layers.17.input_layernorm.weight": "model.safetensors",
247
- "model.layers.17.mlp.down_proj.biases": "model.safetensors",
248
- "model.layers.17.mlp.down_proj.scales": "model.safetensors",
249
- "model.layers.17.mlp.down_proj.weight": "model.safetensors",
250
- "model.layers.17.mlp.gate_proj.biases": "model.safetensors",
251
- "model.layers.17.mlp.gate_proj.scales": "model.safetensors",
252
- "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
253
- "model.layers.17.mlp.up_proj.biases": "model.safetensors",
254
- "model.layers.17.mlp.up_proj.scales": "model.safetensors",
255
- "model.layers.17.mlp.up_proj.weight": "model.safetensors",
256
- "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
257
- "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
258
- "model.layers.17.self_attn.k_proj.biases": "model.safetensors",
259
- "model.layers.17.self_attn.k_proj.scales": "model.safetensors",
260
- "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
261
- "model.layers.17.self_attn.o_proj.biases": "model.safetensors",
262
- "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
263
- "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
264
- "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
265
- "model.layers.17.self_attn.q_proj.biases": "model.safetensors",
266
- "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
267
- "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
268
- "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
269
- "model.layers.17.self_attn.v_proj.biases": "model.safetensors",
270
- "model.layers.17.self_attn.v_proj.scales": "model.safetensors",
271
- "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
272
- "model.layers.18.input_layernorm.weight": "model.safetensors",
273
- "model.layers.18.mlp.down_proj.biases": "model.safetensors",
274
- "model.layers.18.mlp.down_proj.scales": "model.safetensors",
275
- "model.layers.18.mlp.down_proj.weight": "model.safetensors",
276
- "model.layers.18.mlp.gate_proj.biases": "model.safetensors",
277
- "model.layers.18.mlp.gate_proj.scales": "model.safetensors",
278
- "model.layers.18.mlp.gate_proj.weight": "model.safetensors",
279
- "model.layers.18.mlp.up_proj.biases": "model.safetensors",
280
- "model.layers.18.mlp.up_proj.scales": "model.safetensors",
281
- "model.layers.18.mlp.up_proj.weight": "model.safetensors",
282
- "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
283
- "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
284
- "model.layers.18.self_attn.k_proj.biases": "model.safetensors",
285
- "model.layers.18.self_attn.k_proj.scales": "model.safetensors",
286
- "model.layers.18.self_attn.k_proj.weight": "model.safetensors",
287
- "model.layers.18.self_attn.o_proj.biases": "model.safetensors",
288
- "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
289
- "model.layers.18.self_attn.o_proj.weight": "model.safetensors",
290
- "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
291
- "model.layers.18.self_attn.q_proj.biases": "model.safetensors",
292
- "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
293
- "model.layers.18.self_attn.q_proj.weight": "model.safetensors",
294
- "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
295
- "model.layers.18.self_attn.v_proj.biases": "model.safetensors",
296
- "model.layers.18.self_attn.v_proj.scales": "model.safetensors",
297
- "model.layers.18.self_attn.v_proj.weight": "model.safetensors",
298
- "model.layers.19.input_layernorm.weight": "model.safetensors",
299
- "model.layers.19.mlp.down_proj.biases": "model.safetensors",
300
- "model.layers.19.mlp.down_proj.scales": "model.safetensors",
301
- "model.layers.19.mlp.down_proj.weight": "model.safetensors",
302
- "model.layers.19.mlp.gate_proj.biases": "model.safetensors",
303
- "model.layers.19.mlp.gate_proj.scales": "model.safetensors",
304
- "model.layers.19.mlp.gate_proj.weight": "model.safetensors",
305
- "model.layers.19.mlp.up_proj.biases": "model.safetensors",
306
- "model.layers.19.mlp.up_proj.scales": "model.safetensors",
307
- "model.layers.19.mlp.up_proj.weight": "model.safetensors",
308
- "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
309
- "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
310
- "model.layers.19.self_attn.k_proj.biases": "model.safetensors",
311
- "model.layers.19.self_attn.k_proj.scales": "model.safetensors",
312
- "model.layers.19.self_attn.k_proj.weight": "model.safetensors",
313
- "model.layers.19.self_attn.o_proj.biases": "model.safetensors",
314
- "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
315
- "model.layers.19.self_attn.o_proj.weight": "model.safetensors",
316
- "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
317
- "model.layers.19.self_attn.q_proj.biases": "model.safetensors",
318
- "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
319
- "model.layers.19.self_attn.q_proj.weight": "model.safetensors",
320
- "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
321
- "model.layers.19.self_attn.v_proj.biases": "model.safetensors",
322
- "model.layers.19.self_attn.v_proj.scales": "model.safetensors",
323
- "model.layers.19.self_attn.v_proj.weight": "model.safetensors",
324
- "model.layers.2.input_layernorm.weight": "model.safetensors",
325
- "model.layers.2.mlp.down_proj.biases": "model.safetensors",
326
- "model.layers.2.mlp.down_proj.scales": "model.safetensors",
327
- "model.layers.2.mlp.down_proj.weight": "model.safetensors",
328
- "model.layers.2.mlp.gate_proj.biases": "model.safetensors",
329
- "model.layers.2.mlp.gate_proj.scales": "model.safetensors",
330
- "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
331
- "model.layers.2.mlp.up_proj.biases": "model.safetensors",
332
- "model.layers.2.mlp.up_proj.scales": "model.safetensors",
333
- "model.layers.2.mlp.up_proj.weight": "model.safetensors",
334
- "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
335
- "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
336
- "model.layers.2.self_attn.k_proj.biases": "model.safetensors",
337
- "model.layers.2.self_attn.k_proj.scales": "model.safetensors",
338
- "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
339
- "model.layers.2.self_attn.o_proj.biases": "model.safetensors",
340
- "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
341
- "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
342
- "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
343
- "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
344
- "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
345
- "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
346
- "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
347
- "model.layers.2.self_attn.v_proj.biases": "model.safetensors",
348
- "model.layers.2.self_attn.v_proj.scales": "model.safetensors",
349
- "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
350
- "model.layers.20.input_layernorm.weight": "model.safetensors",
351
- "model.layers.20.mlp.down_proj.biases": "model.safetensors",
352
- "model.layers.20.mlp.down_proj.scales": "model.safetensors",
353
- "model.layers.20.mlp.down_proj.weight": "model.safetensors",
354
- "model.layers.20.mlp.gate_proj.biases": "model.safetensors",
355
- "model.layers.20.mlp.gate_proj.scales": "model.safetensors",
356
- "model.layers.20.mlp.gate_proj.weight": "model.safetensors",
357
- "model.layers.20.mlp.up_proj.biases": "model.safetensors",
358
- "model.layers.20.mlp.up_proj.scales": "model.safetensors",
359
- "model.layers.20.mlp.up_proj.weight": "model.safetensors",
360
- "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
361
- "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
362
- "model.layers.20.self_attn.k_proj.biases": "model.safetensors",
363
- "model.layers.20.self_attn.k_proj.scales": "model.safetensors",
364
- "model.layers.20.self_attn.k_proj.weight": "model.safetensors",
365
- "model.layers.20.self_attn.o_proj.biases": "model.safetensors",
366
- "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
367
- "model.layers.20.self_attn.o_proj.weight": "model.safetensors",
368
- "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
369
- "model.layers.20.self_attn.q_proj.biases": "model.safetensors",
370
- "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
371
- "model.layers.20.self_attn.q_proj.weight": "model.safetensors",
372
- "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
373
- "model.layers.20.self_attn.v_proj.biases": "model.safetensors",
374
- "model.layers.20.self_attn.v_proj.scales": "model.safetensors",
375
- "model.layers.20.self_attn.v_proj.weight": "model.safetensors",
376
- "model.layers.21.input_layernorm.weight": "model.safetensors",
377
- "model.layers.21.mlp.down_proj.biases": "model.safetensors",
378
- "model.layers.21.mlp.down_proj.scales": "model.safetensors",
379
- "model.layers.21.mlp.down_proj.weight": "model.safetensors",
380
- "model.layers.21.mlp.gate_proj.biases": "model.safetensors",
381
- "model.layers.21.mlp.gate_proj.scales": "model.safetensors",
382
- "model.layers.21.mlp.gate_proj.weight": "model.safetensors",
383
- "model.layers.21.mlp.up_proj.biases": "model.safetensors",
384
- "model.layers.21.mlp.up_proj.scales": "model.safetensors",
385
- "model.layers.21.mlp.up_proj.weight": "model.safetensors",
386
- "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
387
- "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
388
- "model.layers.21.self_attn.k_proj.biases": "model.safetensors",
389
- "model.layers.21.self_attn.k_proj.scales": "model.safetensors",
390
- "model.layers.21.self_attn.k_proj.weight": "model.safetensors",
391
- "model.layers.21.self_attn.o_proj.biases": "model.safetensors",
392
- "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
393
- "model.layers.21.self_attn.o_proj.weight": "model.safetensors",
394
- "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
395
- "model.layers.21.self_attn.q_proj.biases": "model.safetensors",
396
- "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
397
- "model.layers.21.self_attn.q_proj.weight": "model.safetensors",
398
- "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
399
- "model.layers.21.self_attn.v_proj.biases": "model.safetensors",
400
- "model.layers.21.self_attn.v_proj.scales": "model.safetensors",
401
- "model.layers.21.self_attn.v_proj.weight": "model.safetensors",
402
- "model.layers.22.input_layernorm.weight": "model.safetensors",
403
- "model.layers.22.mlp.down_proj.biases": "model.safetensors",
404
- "model.layers.22.mlp.down_proj.scales": "model.safetensors",
405
- "model.layers.22.mlp.down_proj.weight": "model.safetensors",
406
- "model.layers.22.mlp.gate_proj.biases": "model.safetensors",
407
- "model.layers.22.mlp.gate_proj.scales": "model.safetensors",
408
- "model.layers.22.mlp.gate_proj.weight": "model.safetensors",
409
- "model.layers.22.mlp.up_proj.biases": "model.safetensors",
410
- "model.layers.22.mlp.up_proj.scales": "model.safetensors",
411
- "model.layers.22.mlp.up_proj.weight": "model.safetensors",
412
- "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
413
- "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
414
- "model.layers.22.self_attn.k_proj.biases": "model.safetensors",
415
- "model.layers.22.self_attn.k_proj.scales": "model.safetensors",
416
- "model.layers.22.self_attn.k_proj.weight": "model.safetensors",
417
- "model.layers.22.self_attn.o_proj.biases": "model.safetensors",
418
- "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
419
- "model.layers.22.self_attn.o_proj.weight": "model.safetensors",
420
- "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
421
- "model.layers.22.self_attn.q_proj.biases": "model.safetensors",
422
- "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
423
- "model.layers.22.self_attn.q_proj.weight": "model.safetensors",
424
- "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
425
- "model.layers.22.self_attn.v_proj.biases": "model.safetensors",
426
- "model.layers.22.self_attn.v_proj.scales": "model.safetensors",
427
- "model.layers.22.self_attn.v_proj.weight": "model.safetensors",
428
- "model.layers.23.input_layernorm.weight": "model.safetensors",
429
- "model.layers.23.mlp.down_proj.biases": "model.safetensors",
430
- "model.layers.23.mlp.down_proj.scales": "model.safetensors",
431
- "model.layers.23.mlp.down_proj.weight": "model.safetensors",
432
- "model.layers.23.mlp.gate_proj.biases": "model.safetensors",
433
- "model.layers.23.mlp.gate_proj.scales": "model.safetensors",
434
- "model.layers.23.mlp.gate_proj.weight": "model.safetensors",
435
- "model.layers.23.mlp.up_proj.biases": "model.safetensors",
436
- "model.layers.23.mlp.up_proj.scales": "model.safetensors",
437
- "model.layers.23.mlp.up_proj.weight": "model.safetensors",
438
- "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
439
- "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
440
- "model.layers.23.self_attn.k_proj.biases": "model.safetensors",
441
- "model.layers.23.self_attn.k_proj.scales": "model.safetensors",
442
- "model.layers.23.self_attn.k_proj.weight": "model.safetensors",
443
- "model.layers.23.self_attn.o_proj.biases": "model.safetensors",
444
- "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
445
- "model.layers.23.self_attn.o_proj.weight": "model.safetensors",
446
- "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
447
- "model.layers.23.self_attn.q_proj.biases": "model.safetensors",
448
- "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
449
- "model.layers.23.self_attn.q_proj.weight": "model.safetensors",
450
- "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
451
- "model.layers.23.self_attn.v_proj.biases": "model.safetensors",
452
- "model.layers.23.self_attn.v_proj.scales": "model.safetensors",
453
- "model.layers.23.self_attn.v_proj.weight": "model.safetensors",
454
- "model.layers.24.input_layernorm.weight": "model.safetensors",
455
- "model.layers.24.mlp.down_proj.biases": "model.safetensors",
456
- "model.layers.24.mlp.down_proj.scales": "model.safetensors",
457
- "model.layers.24.mlp.down_proj.weight": "model.safetensors",
458
- "model.layers.24.mlp.gate_proj.biases": "model.safetensors",
459
- "model.layers.24.mlp.gate_proj.scales": "model.safetensors",
460
- "model.layers.24.mlp.gate_proj.weight": "model.safetensors",
461
- "model.layers.24.mlp.up_proj.biases": "model.safetensors",
462
- "model.layers.24.mlp.up_proj.scales": "model.safetensors",
463
- "model.layers.24.mlp.up_proj.weight": "model.safetensors",
464
- "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
465
- "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
466
- "model.layers.24.self_attn.k_proj.biases": "model.safetensors",
467
- "model.layers.24.self_attn.k_proj.scales": "model.safetensors",
468
- "model.layers.24.self_attn.k_proj.weight": "model.safetensors",
469
- "model.layers.24.self_attn.o_proj.biases": "model.safetensors",
470
- "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
471
- "model.layers.24.self_attn.o_proj.weight": "model.safetensors",
472
- "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
473
- "model.layers.24.self_attn.q_proj.biases": "model.safetensors",
474
- "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
475
- "model.layers.24.self_attn.q_proj.weight": "model.safetensors",
476
- "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
477
- "model.layers.24.self_attn.v_proj.biases": "model.safetensors",
478
- "model.layers.24.self_attn.v_proj.scales": "model.safetensors",
479
- "model.layers.24.self_attn.v_proj.weight": "model.safetensors",
480
- "model.layers.25.input_layernorm.weight": "model.safetensors",
481
- "model.layers.25.mlp.down_proj.biases": "model.safetensors",
482
- "model.layers.25.mlp.down_proj.scales": "model.safetensors",
483
- "model.layers.25.mlp.down_proj.weight": "model.safetensors",
484
- "model.layers.25.mlp.gate_proj.biases": "model.safetensors",
485
- "model.layers.25.mlp.gate_proj.scales": "model.safetensors",
486
- "model.layers.25.mlp.gate_proj.weight": "model.safetensors",
487
- "model.layers.25.mlp.up_proj.biases": "model.safetensors",
488
- "model.layers.25.mlp.up_proj.scales": "model.safetensors",
489
- "model.layers.25.mlp.up_proj.weight": "model.safetensors",
490
- "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
491
- "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
492
- "model.layers.25.self_attn.k_proj.biases": "model.safetensors",
493
- "model.layers.25.self_attn.k_proj.scales": "model.safetensors",
494
- "model.layers.25.self_attn.k_proj.weight": "model.safetensors",
495
- "model.layers.25.self_attn.o_proj.biases": "model.safetensors",
496
- "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
497
- "model.layers.25.self_attn.o_proj.weight": "model.safetensors",
498
- "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
499
- "model.layers.25.self_attn.q_proj.biases": "model.safetensors",
500
- "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
501
- "model.layers.25.self_attn.q_proj.weight": "model.safetensors",
502
- "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
503
- "model.layers.25.self_attn.v_proj.biases": "model.safetensors",
504
- "model.layers.25.self_attn.v_proj.scales": "model.safetensors",
505
- "model.layers.25.self_attn.v_proj.weight": "model.safetensors",
506
- "model.layers.26.input_layernorm.weight": "model.safetensors",
507
- "model.layers.26.mlp.down_proj.biases": "model.safetensors",
508
- "model.layers.26.mlp.down_proj.scales": "model.safetensors",
509
- "model.layers.26.mlp.down_proj.weight": "model.safetensors",
510
- "model.layers.26.mlp.gate_proj.biases": "model.safetensors",
511
- "model.layers.26.mlp.gate_proj.scales": "model.safetensors",
512
- "model.layers.26.mlp.gate_proj.weight": "model.safetensors",
513
- "model.layers.26.mlp.up_proj.biases": "model.safetensors",
514
- "model.layers.26.mlp.up_proj.scales": "model.safetensors",
515
- "model.layers.26.mlp.up_proj.weight": "model.safetensors",
516
- "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
517
- "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
518
- "model.layers.26.self_attn.k_proj.biases": "model.safetensors",
519
- "model.layers.26.self_attn.k_proj.scales": "model.safetensors",
520
- "model.layers.26.self_attn.k_proj.weight": "model.safetensors",
521
- "model.layers.26.self_attn.o_proj.biases": "model.safetensors",
522
- "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
523
- "model.layers.26.self_attn.o_proj.weight": "model.safetensors",
524
- "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
525
- "model.layers.26.self_attn.q_proj.biases": "model.safetensors",
526
- "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
527
- "model.layers.26.self_attn.q_proj.weight": "model.safetensors",
528
- "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
529
- "model.layers.26.self_attn.v_proj.biases": "model.safetensors",
530
- "model.layers.26.self_attn.v_proj.scales": "model.safetensors",
531
- "model.layers.26.self_attn.v_proj.weight": "model.safetensors",
532
- "model.layers.27.input_layernorm.weight": "model.safetensors",
533
- "model.layers.27.mlp.down_proj.biases": "model.safetensors",
534
- "model.layers.27.mlp.down_proj.scales": "model.safetensors",
535
- "model.layers.27.mlp.down_proj.weight": "model.safetensors",
536
- "model.layers.27.mlp.gate_proj.biases": "model.safetensors",
537
- "model.layers.27.mlp.gate_proj.scales": "model.safetensors",
538
- "model.layers.27.mlp.gate_proj.weight": "model.safetensors",
539
- "model.layers.27.mlp.up_proj.biases": "model.safetensors",
540
- "model.layers.27.mlp.up_proj.scales": "model.safetensors",
541
- "model.layers.27.mlp.up_proj.weight": "model.safetensors",
542
- "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
543
- "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
544
- "model.layers.27.self_attn.k_proj.biases": "model.safetensors",
545
- "model.layers.27.self_attn.k_proj.scales": "model.safetensors",
546
- "model.layers.27.self_attn.k_proj.weight": "model.safetensors",
547
- "model.layers.27.self_attn.o_proj.biases": "model.safetensors",
548
- "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
549
- "model.layers.27.self_attn.o_proj.weight": "model.safetensors",
550
- "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
551
- "model.layers.27.self_attn.q_proj.biases": "model.safetensors",
552
- "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
553
- "model.layers.27.self_attn.q_proj.weight": "model.safetensors",
554
- "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
555
- "model.layers.27.self_attn.v_proj.biases": "model.safetensors",
556
- "model.layers.27.self_attn.v_proj.scales": "model.safetensors",
557
- "model.layers.27.self_attn.v_proj.weight": "model.safetensors",
558
- "model.layers.3.input_layernorm.weight": "model.safetensors",
559
- "model.layers.3.mlp.down_proj.biases": "model.safetensors",
560
- "model.layers.3.mlp.down_proj.scales": "model.safetensors",
561
- "model.layers.3.mlp.down_proj.weight": "model.safetensors",
562
- "model.layers.3.mlp.gate_proj.biases": "model.safetensors",
563
- "model.layers.3.mlp.gate_proj.scales": "model.safetensors",
564
- "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
565
- "model.layers.3.mlp.up_proj.biases": "model.safetensors",
566
- "model.layers.3.mlp.up_proj.scales": "model.safetensors",
567
- "model.layers.3.mlp.up_proj.weight": "model.safetensors",
568
- "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
569
- "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
570
- "model.layers.3.self_attn.k_proj.biases": "model.safetensors",
571
- "model.layers.3.self_attn.k_proj.scales": "model.safetensors",
572
- "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
573
- "model.layers.3.self_attn.o_proj.biases": "model.safetensors",
574
- "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
575
- "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
576
- "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
577
- "model.layers.3.self_attn.q_proj.biases": "model.safetensors",
578
- "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
579
- "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
580
- "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
581
- "model.layers.3.self_attn.v_proj.biases": "model.safetensors",
582
- "model.layers.3.self_attn.v_proj.scales": "model.safetensors",
583
- "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
584
- "model.layers.4.input_layernorm.weight": "model.safetensors",
585
- "model.layers.4.mlp.down_proj.biases": "model.safetensors",
586
- "model.layers.4.mlp.down_proj.scales": "model.safetensors",
587
- "model.layers.4.mlp.down_proj.weight": "model.safetensors",
588
- "model.layers.4.mlp.gate_proj.biases": "model.safetensors",
589
- "model.layers.4.mlp.gate_proj.scales": "model.safetensors",
590
- "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
591
- "model.layers.4.mlp.up_proj.biases": "model.safetensors",
592
- "model.layers.4.mlp.up_proj.scales": "model.safetensors",
593
- "model.layers.4.mlp.up_proj.weight": "model.safetensors",
594
- "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
595
- "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
596
- "model.layers.4.self_attn.k_proj.biases": "model.safetensors",
597
- "model.layers.4.self_attn.k_proj.scales": "model.safetensors",
598
- "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
599
- "model.layers.4.self_attn.o_proj.biases": "model.safetensors",
600
- "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
601
- "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
602
- "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
603
- "model.layers.4.self_attn.q_proj.biases": "model.safetensors",
604
- "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
605
- "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
606
- "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
607
- "model.layers.4.self_attn.v_proj.biases": "model.safetensors",
608
- "model.layers.4.self_attn.v_proj.scales": "model.safetensors",
609
- "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
610
- "model.layers.5.input_layernorm.weight": "model.safetensors",
611
- "model.layers.5.mlp.down_proj.biases": "model.safetensors",
612
- "model.layers.5.mlp.down_proj.scales": "model.safetensors",
613
- "model.layers.5.mlp.down_proj.weight": "model.safetensors",
614
- "model.layers.5.mlp.gate_proj.biases": "model.safetensors",
615
- "model.layers.5.mlp.gate_proj.scales": "model.safetensors",
616
- "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
617
- "model.layers.5.mlp.up_proj.biases": "model.safetensors",
618
- "model.layers.5.mlp.up_proj.scales": "model.safetensors",
619
- "model.layers.5.mlp.up_proj.weight": "model.safetensors",
620
- "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
621
- "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
622
- "model.layers.5.self_attn.k_proj.biases": "model.safetensors",
623
- "model.layers.5.self_attn.k_proj.scales": "model.safetensors",
624
- "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
625
- "model.layers.5.self_attn.o_proj.biases": "model.safetensors",
626
- "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
627
- "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
628
- "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
629
- "model.layers.5.self_attn.q_proj.biases": "model.safetensors",
630
- "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
631
- "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
632
- "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
633
- "model.layers.5.self_attn.v_proj.biases": "model.safetensors",
634
- "model.layers.5.self_attn.v_proj.scales": "model.safetensors",
635
- "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
636
- "model.layers.6.input_layernorm.weight": "model.safetensors",
637
- "model.layers.6.mlp.down_proj.biases": "model.safetensors",
638
- "model.layers.6.mlp.down_proj.scales": "model.safetensors",
639
- "model.layers.6.mlp.down_proj.weight": "model.safetensors",
640
- "model.layers.6.mlp.gate_proj.biases": "model.safetensors",
641
- "model.layers.6.mlp.gate_proj.scales": "model.safetensors",
642
- "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
643
- "model.layers.6.mlp.up_proj.biases": "model.safetensors",
644
- "model.layers.6.mlp.up_proj.scales": "model.safetensors",
645
- "model.layers.6.mlp.up_proj.weight": "model.safetensors",
646
- "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
647
- "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
648
- "model.layers.6.self_attn.k_proj.biases": "model.safetensors",
649
- "model.layers.6.self_attn.k_proj.scales": "model.safetensors",
650
- "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
651
- "model.layers.6.self_attn.o_proj.biases": "model.safetensors",
652
- "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
653
- "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
654
- "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
655
- "model.layers.6.self_attn.q_proj.biases": "model.safetensors",
656
- "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
657
- "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
658
- "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
659
- "model.layers.6.self_attn.v_proj.biases": "model.safetensors",
660
- "model.layers.6.self_attn.v_proj.scales": "model.safetensors",
661
- "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
662
- "model.layers.7.input_layernorm.weight": "model.safetensors",
663
- "model.layers.7.mlp.down_proj.biases": "model.safetensors",
664
- "model.layers.7.mlp.down_proj.scales": "model.safetensors",
665
- "model.layers.7.mlp.down_proj.weight": "model.safetensors",
666
- "model.layers.7.mlp.gate_proj.biases": "model.safetensors",
667
- "model.layers.7.mlp.gate_proj.scales": "model.safetensors",
668
- "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
669
- "model.layers.7.mlp.up_proj.biases": "model.safetensors",
670
- "model.layers.7.mlp.up_proj.scales": "model.safetensors",
671
- "model.layers.7.mlp.up_proj.weight": "model.safetensors",
672
- "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
673
- "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
674
- "model.layers.7.self_attn.k_proj.biases": "model.safetensors",
675
- "model.layers.7.self_attn.k_proj.scales": "model.safetensors",
676
- "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
677
- "model.layers.7.self_attn.o_proj.biases": "model.safetensors",
678
- "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
679
- "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
680
- "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
681
- "model.layers.7.self_attn.q_proj.biases": "model.safetensors",
682
- "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
683
- "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
684
- "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
685
- "model.layers.7.self_attn.v_proj.biases": "model.safetensors",
686
- "model.layers.7.self_attn.v_proj.scales": "model.safetensors",
687
- "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
688
- "model.layers.8.input_layernorm.weight": "model.safetensors",
689
- "model.layers.8.mlp.down_proj.biases": "model.safetensors",
690
- "model.layers.8.mlp.down_proj.scales": "model.safetensors",
691
- "model.layers.8.mlp.down_proj.weight": "model.safetensors",
692
- "model.layers.8.mlp.gate_proj.biases": "model.safetensors",
693
- "model.layers.8.mlp.gate_proj.scales": "model.safetensors",
694
- "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
695
- "model.layers.8.mlp.up_proj.biases": "model.safetensors",
696
- "model.layers.8.mlp.up_proj.scales": "model.safetensors",
697
- "model.layers.8.mlp.up_proj.weight": "model.safetensors",
698
- "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
699
- "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
700
- "model.layers.8.self_attn.k_proj.biases": "model.safetensors",
701
- "model.layers.8.self_attn.k_proj.scales": "model.safetensors",
702
- "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
703
- "model.layers.8.self_attn.o_proj.biases": "model.safetensors",
704
- "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
705
- "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
706
- "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
707
- "model.layers.8.self_attn.q_proj.biases": "model.safetensors",
708
- "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
709
- "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
710
- "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
711
- "model.layers.8.self_attn.v_proj.biases": "model.safetensors",
712
- "model.layers.8.self_attn.v_proj.scales": "model.safetensors",
713
- "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
714
- "model.layers.9.input_layernorm.weight": "model.safetensors",
715
- "model.layers.9.mlp.down_proj.biases": "model.safetensors",
716
- "model.layers.9.mlp.down_proj.scales": "model.safetensors",
717
- "model.layers.9.mlp.down_proj.weight": "model.safetensors",
718
- "model.layers.9.mlp.gate_proj.biases": "model.safetensors",
719
- "model.layers.9.mlp.gate_proj.scales": "model.safetensors",
720
- "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
721
- "model.layers.9.mlp.up_proj.biases": "model.safetensors",
722
- "model.layers.9.mlp.up_proj.scales": "model.safetensors",
723
- "model.layers.9.mlp.up_proj.weight": "model.safetensors",
724
- "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
725
- "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
726
- "model.layers.9.self_attn.k_proj.biases": "model.safetensors",
727
- "model.layers.9.self_attn.k_proj.scales": "model.safetensors",
728
- "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
729
- "model.layers.9.self_attn.o_proj.biases": "model.safetensors",
730
- "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
731
- "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
732
- "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
733
- "model.layers.9.self_attn.q_proj.biases": "model.safetensors",
734
- "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
735
- "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
736
- "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
737
- "model.layers.9.self_attn.v_proj.biases": "model.safetensors",
738
- "model.layers.9.self_attn.v_proj.scales": "model.safetensors",
739
- "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
740
- "model.norm.weight": "model.safetensors"
741
- }
742
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3,6_mixed/tokenizer_config.json DELETED
@@ -1,195 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "151643": {
7
- "content": "<|end▁of▁sentence|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "151644": {
15
- "content": "<|User|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "151645": {
23
- "content": "<|Assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "151646": {
31
- "content": "<|begin▁of▁sentence|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "151647": {
39
- "content": "<|EOT|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "151648": {
47
- "content": "<think>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "151649": {
55
- "content": "</think>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "151650": {
63
- "content": "<|quad_start|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "151651": {
71
- "content": "<|quad_end|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "151652": {
79
- "content": "<|vision_start|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "151653": {
87
- "content": "<|vision_end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "151654": {
95
- "content": "<|vision_pad|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "151655": {
103
- "content": "<|image_pad|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "151656": {
111
- "content": "<|video_pad|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "151657": {
119
- "content": "<tool_call>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "151658": {
127
- "content": "</tool_call>",
128
- "lstrip": false,
129
- "normalized": false,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "151659": {
135
- "content": "<|fim_prefix|>",
136
- "lstrip": false,
137
- "normalized": false,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "151660": {
143
- "content": "<|fim_middle|>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "151661": {
151
- "content": "<|fim_suffix|>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "151662": {
159
- "content": "<|fim_pad|>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "151663": {
167
- "content": "<|repo_name|>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "151664": {
175
- "content": "<|file_sep|>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- }
182
- },
183
- "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
- "clean_up_tokenization_spaces": false,
186
- "eos_token": "<|end▁of▁sentence|>",
187
- "extra_special_tokens": {},
188
- "legacy": true,
189
- "model_max_length": 16384,
190
- "pad_token": "<|end▁of▁sentence|>",
191
- "sp_model_kwargs": {},
192
- "tokenizer_class": "LlamaTokenizerFast",
193
- "unk_token": null,
194
- "use_default_system_prompt": false
195
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3bit/config.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen2ForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization": {
19
- "group_size": 64,
20
- "bits": 3
21
- },
22
- "quantization_config": {
23
- "group_size": 64,
24
- "bits": 3
25
- },
26
- "rms_norm_eps": 1e-06,
27
- "rope_theta": 10000,
28
- "sliding_window": 4096,
29
- "tie_word_embeddings": false,
30
- "torch_dtype": "bfloat16",
31
- "transformers_version": "4.44.0",
32
- "use_cache": true,
33
- "use_mrope": false,
34
- "use_sliding_window": false,
35
- "vocab_size": 152064
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3bit/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d23eb188c19c3554501788a8d38d88cf23afec6c5248d10b83a8f3eed1361e9d
3
- size 3332435653
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3bit/model.safetensors.index.json DELETED
@@ -1,742 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 3332353024
4
- },
5
- "weight_map": {
6
- "lm_head.biases": "model.safetensors",
7
- "lm_head.scales": "model.safetensors",
8
- "lm_head.weight": "model.safetensors",
9
- "model.embed_tokens.biases": "model.safetensors",
10
- "model.embed_tokens.scales": "model.safetensors",
11
- "model.embed_tokens.weight": "model.safetensors",
12
- "model.layers.0.input_layernorm.weight": "model.safetensors",
13
- "model.layers.0.mlp.down_proj.biases": "model.safetensors",
14
- "model.layers.0.mlp.down_proj.scales": "model.safetensors",
15
- "model.layers.0.mlp.down_proj.weight": "model.safetensors",
16
- "model.layers.0.mlp.gate_proj.biases": "model.safetensors",
17
- "model.layers.0.mlp.gate_proj.scales": "model.safetensors",
18
- "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
19
- "model.layers.0.mlp.up_proj.biases": "model.safetensors",
20
- "model.layers.0.mlp.up_proj.scales": "model.safetensors",
21
- "model.layers.0.mlp.up_proj.weight": "model.safetensors",
22
- "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
23
- "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
24
- "model.layers.0.self_attn.k_proj.biases": "model.safetensors",
25
- "model.layers.0.self_attn.k_proj.scales": "model.safetensors",
26
- "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
27
- "model.layers.0.self_attn.o_proj.biases": "model.safetensors",
28
- "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
29
- "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
30
- "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
31
- "model.layers.0.self_attn.q_proj.biases": "model.safetensors",
32
- "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
33
- "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
34
- "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
35
- "model.layers.0.self_attn.v_proj.biases": "model.safetensors",
36
- "model.layers.0.self_attn.v_proj.scales": "model.safetensors",
37
- "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
38
- "model.layers.1.input_layernorm.weight": "model.safetensors",
39
- "model.layers.1.mlp.down_proj.biases": "model.safetensors",
40
- "model.layers.1.mlp.down_proj.scales": "model.safetensors",
41
- "model.layers.1.mlp.down_proj.weight": "model.safetensors",
42
- "model.layers.1.mlp.gate_proj.biases": "model.safetensors",
43
- "model.layers.1.mlp.gate_proj.scales": "model.safetensors",
44
- "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
45
- "model.layers.1.mlp.up_proj.biases": "model.safetensors",
46
- "model.layers.1.mlp.up_proj.scales": "model.safetensors",
47
- "model.layers.1.mlp.up_proj.weight": "model.safetensors",
48
- "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
49
- "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
50
- "model.layers.1.self_attn.k_proj.biases": "model.safetensors",
51
- "model.layers.1.self_attn.k_proj.scales": "model.safetensors",
52
- "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
53
- "model.layers.1.self_attn.o_proj.biases": "model.safetensors",
54
- "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
55
- "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
56
- "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
57
- "model.layers.1.self_attn.q_proj.biases": "model.safetensors",
58
- "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
59
- "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
60
- "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
61
- "model.layers.1.self_attn.v_proj.biases": "model.safetensors",
62
- "model.layers.1.self_attn.v_proj.scales": "model.safetensors",
63
- "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
64
- "model.layers.10.input_layernorm.weight": "model.safetensors",
65
- "model.layers.10.mlp.down_proj.biases": "model.safetensors",
66
- "model.layers.10.mlp.down_proj.scales": "model.safetensors",
67
- "model.layers.10.mlp.down_proj.weight": "model.safetensors",
68
- "model.layers.10.mlp.gate_proj.biases": "model.safetensors",
69
- "model.layers.10.mlp.gate_proj.scales": "model.safetensors",
70
- "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
71
- "model.layers.10.mlp.up_proj.biases": "model.safetensors",
72
- "model.layers.10.mlp.up_proj.scales": "model.safetensors",
73
- "model.layers.10.mlp.up_proj.weight": "model.safetensors",
74
- "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
75
- "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
76
- "model.layers.10.self_attn.k_proj.biases": "model.safetensors",
77
- "model.layers.10.self_attn.k_proj.scales": "model.safetensors",
78
- "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
79
- "model.layers.10.self_attn.o_proj.biases": "model.safetensors",
80
- "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
81
- "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
82
- "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
83
- "model.layers.10.self_attn.q_proj.biases": "model.safetensors",
84
- "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
85
- "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
86
- "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
87
- "model.layers.10.self_attn.v_proj.biases": "model.safetensors",
88
- "model.layers.10.self_attn.v_proj.scales": "model.safetensors",
89
- "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
90
- "model.layers.11.input_layernorm.weight": "model.safetensors",
91
- "model.layers.11.mlp.down_proj.biases": "model.safetensors",
92
- "model.layers.11.mlp.down_proj.scales": "model.safetensors",
93
- "model.layers.11.mlp.down_proj.weight": "model.safetensors",
94
- "model.layers.11.mlp.gate_proj.biases": "model.safetensors",
95
- "model.layers.11.mlp.gate_proj.scales": "model.safetensors",
96
- "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
97
- "model.layers.11.mlp.up_proj.biases": "model.safetensors",
98
- "model.layers.11.mlp.up_proj.scales": "model.safetensors",
99
- "model.layers.11.mlp.up_proj.weight": "model.safetensors",
100
- "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
101
- "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
102
- "model.layers.11.self_attn.k_proj.biases": "model.safetensors",
103
- "model.layers.11.self_attn.k_proj.scales": "model.safetensors",
104
- "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
105
- "model.layers.11.self_attn.o_proj.biases": "model.safetensors",
106
- "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
107
- "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
108
- "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
109
- "model.layers.11.self_attn.q_proj.biases": "model.safetensors",
110
- "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
111
- "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
112
- "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
113
- "model.layers.11.self_attn.v_proj.biases": "model.safetensors",
114
- "model.layers.11.self_attn.v_proj.scales": "model.safetensors",
115
- "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
116
- "model.layers.12.input_layernorm.weight": "model.safetensors",
117
- "model.layers.12.mlp.down_proj.biases": "model.safetensors",
118
- "model.layers.12.mlp.down_proj.scales": "model.safetensors",
119
- "model.layers.12.mlp.down_proj.weight": "model.safetensors",
120
- "model.layers.12.mlp.gate_proj.biases": "model.safetensors",
121
- "model.layers.12.mlp.gate_proj.scales": "model.safetensors",
122
- "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
123
- "model.layers.12.mlp.up_proj.biases": "model.safetensors",
124
- "model.layers.12.mlp.up_proj.scales": "model.safetensors",
125
- "model.layers.12.mlp.up_proj.weight": "model.safetensors",
126
- "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
127
- "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
128
- "model.layers.12.self_attn.k_proj.biases": "model.safetensors",
129
- "model.layers.12.self_attn.k_proj.scales": "model.safetensors",
130
- "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
131
- "model.layers.12.self_attn.o_proj.biases": "model.safetensors",
132
- "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
133
- "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
134
- "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
135
- "model.layers.12.self_attn.q_proj.biases": "model.safetensors",
136
- "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
137
- "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
138
- "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
139
- "model.layers.12.self_attn.v_proj.biases": "model.safetensors",
140
- "model.layers.12.self_attn.v_proj.scales": "model.safetensors",
141
- "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
142
- "model.layers.13.input_layernorm.weight": "model.safetensors",
143
- "model.layers.13.mlp.down_proj.biases": "model.safetensors",
144
- "model.layers.13.mlp.down_proj.scales": "model.safetensors",
145
- "model.layers.13.mlp.down_proj.weight": "model.safetensors",
146
- "model.layers.13.mlp.gate_proj.biases": "model.safetensors",
147
- "model.layers.13.mlp.gate_proj.scales": "model.safetensors",
148
- "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
149
- "model.layers.13.mlp.up_proj.biases": "model.safetensors",
150
- "model.layers.13.mlp.up_proj.scales": "model.safetensors",
151
- "model.layers.13.mlp.up_proj.weight": "model.safetensors",
152
- "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
153
- "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
154
- "model.layers.13.self_attn.k_proj.biases": "model.safetensors",
155
- "model.layers.13.self_attn.k_proj.scales": "model.safetensors",
156
- "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
157
- "model.layers.13.self_attn.o_proj.biases": "model.safetensors",
158
- "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
159
- "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
160
- "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
161
- "model.layers.13.self_attn.q_proj.biases": "model.safetensors",
162
- "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
163
- "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
164
- "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
165
- "model.layers.13.self_attn.v_proj.biases": "model.safetensors",
166
- "model.layers.13.self_attn.v_proj.scales": "model.safetensors",
167
- "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
168
- "model.layers.14.input_layernorm.weight": "model.safetensors",
169
- "model.layers.14.mlp.down_proj.biases": "model.safetensors",
170
- "model.layers.14.mlp.down_proj.scales": "model.safetensors",
171
- "model.layers.14.mlp.down_proj.weight": "model.safetensors",
172
- "model.layers.14.mlp.gate_proj.biases": "model.safetensors",
173
- "model.layers.14.mlp.gate_proj.scales": "model.safetensors",
174
- "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
175
- "model.layers.14.mlp.up_proj.biases": "model.safetensors",
176
- "model.layers.14.mlp.up_proj.scales": "model.safetensors",
177
- "model.layers.14.mlp.up_proj.weight": "model.safetensors",
178
- "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
179
- "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
180
- "model.layers.14.self_attn.k_proj.biases": "model.safetensors",
181
- "model.layers.14.self_attn.k_proj.scales": "model.safetensors",
182
- "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
183
- "model.layers.14.self_attn.o_proj.biases": "model.safetensors",
184
- "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
185
- "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
186
- "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
187
- "model.layers.14.self_attn.q_proj.biases": "model.safetensors",
188
- "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
189
- "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
190
- "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
191
- "model.layers.14.self_attn.v_proj.biases": "model.safetensors",
192
- "model.layers.14.self_attn.v_proj.scales": "model.safetensors",
193
- "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
194
- "model.layers.15.input_layernorm.weight": "model.safetensors",
195
- "model.layers.15.mlp.down_proj.biases": "model.safetensors",
196
- "model.layers.15.mlp.down_proj.scales": "model.safetensors",
197
- "model.layers.15.mlp.down_proj.weight": "model.safetensors",
198
- "model.layers.15.mlp.gate_proj.biases": "model.safetensors",
199
- "model.layers.15.mlp.gate_proj.scales": "model.safetensors",
200
- "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
201
- "model.layers.15.mlp.up_proj.biases": "model.safetensors",
202
- "model.layers.15.mlp.up_proj.scales": "model.safetensors",
203
- "model.layers.15.mlp.up_proj.weight": "model.safetensors",
204
- "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
205
- "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
206
- "model.layers.15.self_attn.k_proj.biases": "model.safetensors",
207
- "model.layers.15.self_attn.k_proj.scales": "model.safetensors",
208
- "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
209
- "model.layers.15.self_attn.o_proj.biases": "model.safetensors",
210
- "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
211
- "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
212
- "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
213
- "model.layers.15.self_attn.q_proj.biases": "model.safetensors",
214
- "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
215
- "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
216
- "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
217
- "model.layers.15.self_attn.v_proj.biases": "model.safetensors",
218
- "model.layers.15.self_attn.v_proj.scales": "model.safetensors",
219
- "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
220
- "model.layers.16.input_layernorm.weight": "model.safetensors",
221
- "model.layers.16.mlp.down_proj.biases": "model.safetensors",
222
- "model.layers.16.mlp.down_proj.scales": "model.safetensors",
223
- "model.layers.16.mlp.down_proj.weight": "model.safetensors",
224
- "model.layers.16.mlp.gate_proj.biases": "model.safetensors",
225
- "model.layers.16.mlp.gate_proj.scales": "model.safetensors",
226
- "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
227
- "model.layers.16.mlp.up_proj.biases": "model.safetensors",
228
- "model.layers.16.mlp.up_proj.scales": "model.safetensors",
229
- "model.layers.16.mlp.up_proj.weight": "model.safetensors",
230
- "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
231
- "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
232
- "model.layers.16.self_attn.k_proj.biases": "model.safetensors",
233
- "model.layers.16.self_attn.k_proj.scales": "model.safetensors",
234
- "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
235
- "model.layers.16.self_attn.o_proj.biases": "model.safetensors",
236
- "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
237
- "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
238
- "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
239
- "model.layers.16.self_attn.q_proj.biases": "model.safetensors",
240
- "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
241
- "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
242
- "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
243
- "model.layers.16.self_attn.v_proj.biases": "model.safetensors",
244
- "model.layers.16.self_attn.v_proj.scales": "model.safetensors",
245
- "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
246
- "model.layers.17.input_layernorm.weight": "model.safetensors",
247
- "model.layers.17.mlp.down_proj.biases": "model.safetensors",
248
- "model.layers.17.mlp.down_proj.scales": "model.safetensors",
249
- "model.layers.17.mlp.down_proj.weight": "model.safetensors",
250
- "model.layers.17.mlp.gate_proj.biases": "model.safetensors",
251
- "model.layers.17.mlp.gate_proj.scales": "model.safetensors",
252
- "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
253
- "model.layers.17.mlp.up_proj.biases": "model.safetensors",
254
- "model.layers.17.mlp.up_proj.scales": "model.safetensors",
255
- "model.layers.17.mlp.up_proj.weight": "model.safetensors",
256
- "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
257
- "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
258
- "model.layers.17.self_attn.k_proj.biases": "model.safetensors",
259
- "model.layers.17.self_attn.k_proj.scales": "model.safetensors",
260
- "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
261
- "model.layers.17.self_attn.o_proj.biases": "model.safetensors",
262
- "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
263
- "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
264
- "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
265
- "model.layers.17.self_attn.q_proj.biases": "model.safetensors",
266
- "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
267
- "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
268
- "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
269
- "model.layers.17.self_attn.v_proj.biases": "model.safetensors",
270
- "model.layers.17.self_attn.v_proj.scales": "model.safetensors",
271
- "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
272
- "model.layers.18.input_layernorm.weight": "model.safetensors",
273
- "model.layers.18.mlp.down_proj.biases": "model.safetensors",
274
- "model.layers.18.mlp.down_proj.scales": "model.safetensors",
275
- "model.layers.18.mlp.down_proj.weight": "model.safetensors",
276
- "model.layers.18.mlp.gate_proj.biases": "model.safetensors",
277
- "model.layers.18.mlp.gate_proj.scales": "model.safetensors",
278
- "model.layers.18.mlp.gate_proj.weight": "model.safetensors",
279
- "model.layers.18.mlp.up_proj.biases": "model.safetensors",
280
- "model.layers.18.mlp.up_proj.scales": "model.safetensors",
281
- "model.layers.18.mlp.up_proj.weight": "model.safetensors",
282
- "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
283
- "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
284
- "model.layers.18.self_attn.k_proj.biases": "model.safetensors",
285
- "model.layers.18.self_attn.k_proj.scales": "model.safetensors",
286
- "model.layers.18.self_attn.k_proj.weight": "model.safetensors",
287
- "model.layers.18.self_attn.o_proj.biases": "model.safetensors",
288
- "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
289
- "model.layers.18.self_attn.o_proj.weight": "model.safetensors",
290
- "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
291
- "model.layers.18.self_attn.q_proj.biases": "model.safetensors",
292
- "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
293
- "model.layers.18.self_attn.q_proj.weight": "model.safetensors",
294
- "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
295
- "model.layers.18.self_attn.v_proj.biases": "model.safetensors",
296
- "model.layers.18.self_attn.v_proj.scales": "model.safetensors",
297
- "model.layers.18.self_attn.v_proj.weight": "model.safetensors",
298
- "model.layers.19.input_layernorm.weight": "model.safetensors",
299
- "model.layers.19.mlp.down_proj.biases": "model.safetensors",
300
- "model.layers.19.mlp.down_proj.scales": "model.safetensors",
301
- "model.layers.19.mlp.down_proj.weight": "model.safetensors",
302
- "model.layers.19.mlp.gate_proj.biases": "model.safetensors",
303
- "model.layers.19.mlp.gate_proj.scales": "model.safetensors",
304
- "model.layers.19.mlp.gate_proj.weight": "model.safetensors",
305
- "model.layers.19.mlp.up_proj.biases": "model.safetensors",
306
- "model.layers.19.mlp.up_proj.scales": "model.safetensors",
307
- "model.layers.19.mlp.up_proj.weight": "model.safetensors",
308
- "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
309
- "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
310
- "model.layers.19.self_attn.k_proj.biases": "model.safetensors",
311
- "model.layers.19.self_attn.k_proj.scales": "model.safetensors",
312
- "model.layers.19.self_attn.k_proj.weight": "model.safetensors",
313
- "model.layers.19.self_attn.o_proj.biases": "model.safetensors",
314
- "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
315
- "model.layers.19.self_attn.o_proj.weight": "model.safetensors",
316
- "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
317
- "model.layers.19.self_attn.q_proj.biases": "model.safetensors",
318
- "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
319
- "model.layers.19.self_attn.q_proj.weight": "model.safetensors",
320
- "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
321
- "model.layers.19.self_attn.v_proj.biases": "model.safetensors",
322
- "model.layers.19.self_attn.v_proj.scales": "model.safetensors",
323
- "model.layers.19.self_attn.v_proj.weight": "model.safetensors",
324
- "model.layers.2.input_layernorm.weight": "model.safetensors",
325
- "model.layers.2.mlp.down_proj.biases": "model.safetensors",
326
- "model.layers.2.mlp.down_proj.scales": "model.safetensors",
327
- "model.layers.2.mlp.down_proj.weight": "model.safetensors",
328
- "model.layers.2.mlp.gate_proj.biases": "model.safetensors",
329
- "model.layers.2.mlp.gate_proj.scales": "model.safetensors",
330
- "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
331
- "model.layers.2.mlp.up_proj.biases": "model.safetensors",
332
- "model.layers.2.mlp.up_proj.scales": "model.safetensors",
333
- "model.layers.2.mlp.up_proj.weight": "model.safetensors",
334
- "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
335
- "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
336
- "model.layers.2.self_attn.k_proj.biases": "model.safetensors",
337
- "model.layers.2.self_attn.k_proj.scales": "model.safetensors",
338
- "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
339
- "model.layers.2.self_attn.o_proj.biases": "model.safetensors",
340
- "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
341
- "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
342
- "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
343
- "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
344
- "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
345
- "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
346
- "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
347
- "model.layers.2.self_attn.v_proj.biases": "model.safetensors",
348
- "model.layers.2.self_attn.v_proj.scales": "model.safetensors",
349
- "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
350
- "model.layers.20.input_layernorm.weight": "model.safetensors",
351
- "model.layers.20.mlp.down_proj.biases": "model.safetensors",
352
- "model.layers.20.mlp.down_proj.scales": "model.safetensors",
353
- "model.layers.20.mlp.down_proj.weight": "model.safetensors",
354
- "model.layers.20.mlp.gate_proj.biases": "model.safetensors",
355
- "model.layers.20.mlp.gate_proj.scales": "model.safetensors",
356
- "model.layers.20.mlp.gate_proj.weight": "model.safetensors",
357
- "model.layers.20.mlp.up_proj.biases": "model.safetensors",
358
- "model.layers.20.mlp.up_proj.scales": "model.safetensors",
359
- "model.layers.20.mlp.up_proj.weight": "model.safetensors",
360
- "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
361
- "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
362
- "model.layers.20.self_attn.k_proj.biases": "model.safetensors",
363
- "model.layers.20.self_attn.k_proj.scales": "model.safetensors",
364
- "model.layers.20.self_attn.k_proj.weight": "model.safetensors",
365
- "model.layers.20.self_attn.o_proj.biases": "model.safetensors",
366
- "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
367
- "model.layers.20.self_attn.o_proj.weight": "model.safetensors",
368
- "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
369
- "model.layers.20.self_attn.q_proj.biases": "model.safetensors",
370
- "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
371
- "model.layers.20.self_attn.q_proj.weight": "model.safetensors",
372
- "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
373
- "model.layers.20.self_attn.v_proj.biases": "model.safetensors",
374
- "model.layers.20.self_attn.v_proj.scales": "model.safetensors",
375
- "model.layers.20.self_attn.v_proj.weight": "model.safetensors",
376
- "model.layers.21.input_layernorm.weight": "model.safetensors",
377
- "model.layers.21.mlp.down_proj.biases": "model.safetensors",
378
- "model.layers.21.mlp.down_proj.scales": "model.safetensors",
379
- "model.layers.21.mlp.down_proj.weight": "model.safetensors",
380
- "model.layers.21.mlp.gate_proj.biases": "model.safetensors",
381
- "model.layers.21.mlp.gate_proj.scales": "model.safetensors",
382
- "model.layers.21.mlp.gate_proj.weight": "model.safetensors",
383
- "model.layers.21.mlp.up_proj.biases": "model.safetensors",
384
- "model.layers.21.mlp.up_proj.scales": "model.safetensors",
385
- "model.layers.21.mlp.up_proj.weight": "model.safetensors",
386
- "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
387
- "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
388
- "model.layers.21.self_attn.k_proj.biases": "model.safetensors",
389
- "model.layers.21.self_attn.k_proj.scales": "model.safetensors",
390
- "model.layers.21.self_attn.k_proj.weight": "model.safetensors",
391
- "model.layers.21.self_attn.o_proj.biases": "model.safetensors",
392
- "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
393
- "model.layers.21.self_attn.o_proj.weight": "model.safetensors",
394
- "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
395
- "model.layers.21.self_attn.q_proj.biases": "model.safetensors",
396
- "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
397
- "model.layers.21.self_attn.q_proj.weight": "model.safetensors",
398
- "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
399
- "model.layers.21.self_attn.v_proj.biases": "model.safetensors",
400
- "model.layers.21.self_attn.v_proj.scales": "model.safetensors",
401
- "model.layers.21.self_attn.v_proj.weight": "model.safetensors",
402
- "model.layers.22.input_layernorm.weight": "model.safetensors",
403
- "model.layers.22.mlp.down_proj.biases": "model.safetensors",
404
- "model.layers.22.mlp.down_proj.scales": "model.safetensors",
405
- "model.layers.22.mlp.down_proj.weight": "model.safetensors",
406
- "model.layers.22.mlp.gate_proj.biases": "model.safetensors",
407
- "model.layers.22.mlp.gate_proj.scales": "model.safetensors",
408
- "model.layers.22.mlp.gate_proj.weight": "model.safetensors",
409
- "model.layers.22.mlp.up_proj.biases": "model.safetensors",
410
- "model.layers.22.mlp.up_proj.scales": "model.safetensors",
411
- "model.layers.22.mlp.up_proj.weight": "model.safetensors",
412
- "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
413
- "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
414
- "model.layers.22.self_attn.k_proj.biases": "model.safetensors",
415
- "model.layers.22.self_attn.k_proj.scales": "model.safetensors",
416
- "model.layers.22.self_attn.k_proj.weight": "model.safetensors",
417
- "model.layers.22.self_attn.o_proj.biases": "model.safetensors",
418
- "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
419
- "model.layers.22.self_attn.o_proj.weight": "model.safetensors",
420
- "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
421
- "model.layers.22.self_attn.q_proj.biases": "model.safetensors",
422
- "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
423
- "model.layers.22.self_attn.q_proj.weight": "model.safetensors",
424
- "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
425
- "model.layers.22.self_attn.v_proj.biases": "model.safetensors",
426
- "model.layers.22.self_attn.v_proj.scales": "model.safetensors",
427
- "model.layers.22.self_attn.v_proj.weight": "model.safetensors",
428
- "model.layers.23.input_layernorm.weight": "model.safetensors",
429
- "model.layers.23.mlp.down_proj.biases": "model.safetensors",
430
- "model.layers.23.mlp.down_proj.scales": "model.safetensors",
431
- "model.layers.23.mlp.down_proj.weight": "model.safetensors",
432
- "model.layers.23.mlp.gate_proj.biases": "model.safetensors",
433
- "model.layers.23.mlp.gate_proj.scales": "model.safetensors",
434
- "model.layers.23.mlp.gate_proj.weight": "model.safetensors",
435
- "model.layers.23.mlp.up_proj.biases": "model.safetensors",
436
- "model.layers.23.mlp.up_proj.scales": "model.safetensors",
437
- "model.layers.23.mlp.up_proj.weight": "model.safetensors",
438
- "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
439
- "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
440
- "model.layers.23.self_attn.k_proj.biases": "model.safetensors",
441
- "model.layers.23.self_attn.k_proj.scales": "model.safetensors",
442
- "model.layers.23.self_attn.k_proj.weight": "model.safetensors",
443
- "model.layers.23.self_attn.o_proj.biases": "model.safetensors",
444
- "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
445
- "model.layers.23.self_attn.o_proj.weight": "model.safetensors",
446
- "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
447
- "model.layers.23.self_attn.q_proj.biases": "model.safetensors",
448
- "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
449
- "model.layers.23.self_attn.q_proj.weight": "model.safetensors",
450
- "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
451
- "model.layers.23.self_attn.v_proj.biases": "model.safetensors",
452
- "model.layers.23.self_attn.v_proj.scales": "model.safetensors",
453
- "model.layers.23.self_attn.v_proj.weight": "model.safetensors",
454
- "model.layers.24.input_layernorm.weight": "model.safetensors",
455
- "model.layers.24.mlp.down_proj.biases": "model.safetensors",
456
- "model.layers.24.mlp.down_proj.scales": "model.safetensors",
457
- "model.layers.24.mlp.down_proj.weight": "model.safetensors",
458
- "model.layers.24.mlp.gate_proj.biases": "model.safetensors",
459
- "model.layers.24.mlp.gate_proj.scales": "model.safetensors",
460
- "model.layers.24.mlp.gate_proj.weight": "model.safetensors",
461
- "model.layers.24.mlp.up_proj.biases": "model.safetensors",
462
- "model.layers.24.mlp.up_proj.scales": "model.safetensors",
463
- "model.layers.24.mlp.up_proj.weight": "model.safetensors",
464
- "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
465
- "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
466
- "model.layers.24.self_attn.k_proj.biases": "model.safetensors",
467
- "model.layers.24.self_attn.k_proj.scales": "model.safetensors",
468
- "model.layers.24.self_attn.k_proj.weight": "model.safetensors",
469
- "model.layers.24.self_attn.o_proj.biases": "model.safetensors",
470
- "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
471
- "model.layers.24.self_attn.o_proj.weight": "model.safetensors",
472
- "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
473
- "model.layers.24.self_attn.q_proj.biases": "model.safetensors",
474
- "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
475
- "model.layers.24.self_attn.q_proj.weight": "model.safetensors",
476
- "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
477
- "model.layers.24.self_attn.v_proj.biases": "model.safetensors",
478
- "model.layers.24.self_attn.v_proj.scales": "model.safetensors",
479
- "model.layers.24.self_attn.v_proj.weight": "model.safetensors",
480
- "model.layers.25.input_layernorm.weight": "model.safetensors",
481
- "model.layers.25.mlp.down_proj.biases": "model.safetensors",
482
- "model.layers.25.mlp.down_proj.scales": "model.safetensors",
483
- "model.layers.25.mlp.down_proj.weight": "model.safetensors",
484
- "model.layers.25.mlp.gate_proj.biases": "model.safetensors",
485
- "model.layers.25.mlp.gate_proj.scales": "model.safetensors",
486
- "model.layers.25.mlp.gate_proj.weight": "model.safetensors",
487
- "model.layers.25.mlp.up_proj.biases": "model.safetensors",
488
- "model.layers.25.mlp.up_proj.scales": "model.safetensors",
489
- "model.layers.25.mlp.up_proj.weight": "model.safetensors",
490
- "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
491
- "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
492
- "model.layers.25.self_attn.k_proj.biases": "model.safetensors",
493
- "model.layers.25.self_attn.k_proj.scales": "model.safetensors",
494
- "model.layers.25.self_attn.k_proj.weight": "model.safetensors",
495
- "model.layers.25.self_attn.o_proj.biases": "model.safetensors",
496
- "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
497
- "model.layers.25.self_attn.o_proj.weight": "model.safetensors",
498
- "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
499
- "model.layers.25.self_attn.q_proj.biases": "model.safetensors",
500
- "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
501
- "model.layers.25.self_attn.q_proj.weight": "model.safetensors",
502
- "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
503
- "model.layers.25.self_attn.v_proj.biases": "model.safetensors",
504
- "model.layers.25.self_attn.v_proj.scales": "model.safetensors",
505
- "model.layers.25.self_attn.v_proj.weight": "model.safetensors",
506
- "model.layers.26.input_layernorm.weight": "model.safetensors",
507
- "model.layers.26.mlp.down_proj.biases": "model.safetensors",
508
- "model.layers.26.mlp.down_proj.scales": "model.safetensors",
509
- "model.layers.26.mlp.down_proj.weight": "model.safetensors",
510
- "model.layers.26.mlp.gate_proj.biases": "model.safetensors",
511
- "model.layers.26.mlp.gate_proj.scales": "model.safetensors",
512
- "model.layers.26.mlp.gate_proj.weight": "model.safetensors",
513
- "model.layers.26.mlp.up_proj.biases": "model.safetensors",
514
- "model.layers.26.mlp.up_proj.scales": "model.safetensors",
515
- "model.layers.26.mlp.up_proj.weight": "model.safetensors",
516
- "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
517
- "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
518
- "model.layers.26.self_attn.k_proj.biases": "model.safetensors",
519
- "model.layers.26.self_attn.k_proj.scales": "model.safetensors",
520
- "model.layers.26.self_attn.k_proj.weight": "model.safetensors",
521
- "model.layers.26.self_attn.o_proj.biases": "model.safetensors",
522
- "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
523
- "model.layers.26.self_attn.o_proj.weight": "model.safetensors",
524
- "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
525
- "model.layers.26.self_attn.q_proj.biases": "model.safetensors",
526
- "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
527
- "model.layers.26.self_attn.q_proj.weight": "model.safetensors",
528
- "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
529
- "model.layers.26.self_attn.v_proj.biases": "model.safetensors",
530
- "model.layers.26.self_attn.v_proj.scales": "model.safetensors",
531
- "model.layers.26.self_attn.v_proj.weight": "model.safetensors",
532
- "model.layers.27.input_layernorm.weight": "model.safetensors",
533
- "model.layers.27.mlp.down_proj.biases": "model.safetensors",
534
- "model.layers.27.mlp.down_proj.scales": "model.safetensors",
535
- "model.layers.27.mlp.down_proj.weight": "model.safetensors",
536
- "model.layers.27.mlp.gate_proj.biases": "model.safetensors",
537
- "model.layers.27.mlp.gate_proj.scales": "model.safetensors",
538
- "model.layers.27.mlp.gate_proj.weight": "model.safetensors",
539
- "model.layers.27.mlp.up_proj.biases": "model.safetensors",
540
- "model.layers.27.mlp.up_proj.scales": "model.safetensors",
541
- "model.layers.27.mlp.up_proj.weight": "model.safetensors",
542
- "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
543
- "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
544
- "model.layers.27.self_attn.k_proj.biases": "model.safetensors",
545
- "model.layers.27.self_attn.k_proj.scales": "model.safetensors",
546
- "model.layers.27.self_attn.k_proj.weight": "model.safetensors",
547
- "model.layers.27.self_attn.o_proj.biases": "model.safetensors",
548
- "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
549
- "model.layers.27.self_attn.o_proj.weight": "model.safetensors",
550
- "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
551
- "model.layers.27.self_attn.q_proj.biases": "model.safetensors",
552
- "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
553
- "model.layers.27.self_attn.q_proj.weight": "model.safetensors",
554
- "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
555
- "model.layers.27.self_attn.v_proj.biases": "model.safetensors",
556
- "model.layers.27.self_attn.v_proj.scales": "model.safetensors",
557
- "model.layers.27.self_attn.v_proj.weight": "model.safetensors",
558
- "model.layers.3.input_layernorm.weight": "model.safetensors",
559
- "model.layers.3.mlp.down_proj.biases": "model.safetensors",
560
- "model.layers.3.mlp.down_proj.scales": "model.safetensors",
561
- "model.layers.3.mlp.down_proj.weight": "model.safetensors",
562
- "model.layers.3.mlp.gate_proj.biases": "model.safetensors",
563
- "model.layers.3.mlp.gate_proj.scales": "model.safetensors",
564
- "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
565
- "model.layers.3.mlp.up_proj.biases": "model.safetensors",
566
- "model.layers.3.mlp.up_proj.scales": "model.safetensors",
567
- "model.layers.3.mlp.up_proj.weight": "model.safetensors",
568
- "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
569
- "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
570
- "model.layers.3.self_attn.k_proj.biases": "model.safetensors",
571
- "model.layers.3.self_attn.k_proj.scales": "model.safetensors",
572
- "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
573
- "model.layers.3.self_attn.o_proj.biases": "model.safetensors",
574
- "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
575
- "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
576
- "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
577
- "model.layers.3.self_attn.q_proj.biases": "model.safetensors",
578
- "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
579
- "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
580
- "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
581
- "model.layers.3.self_attn.v_proj.biases": "model.safetensors",
582
- "model.layers.3.self_attn.v_proj.scales": "model.safetensors",
583
- "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
584
- "model.layers.4.input_layernorm.weight": "model.safetensors",
585
- "model.layers.4.mlp.down_proj.biases": "model.safetensors",
586
- "model.layers.4.mlp.down_proj.scales": "model.safetensors",
587
- "model.layers.4.mlp.down_proj.weight": "model.safetensors",
588
- "model.layers.4.mlp.gate_proj.biases": "model.safetensors",
589
- "model.layers.4.mlp.gate_proj.scales": "model.safetensors",
590
- "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
591
- "model.layers.4.mlp.up_proj.biases": "model.safetensors",
592
- "model.layers.4.mlp.up_proj.scales": "model.safetensors",
593
- "model.layers.4.mlp.up_proj.weight": "model.safetensors",
594
- "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
595
- "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
596
- "model.layers.4.self_attn.k_proj.biases": "model.safetensors",
597
- "model.layers.4.self_attn.k_proj.scales": "model.safetensors",
598
- "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
599
- "model.layers.4.self_attn.o_proj.biases": "model.safetensors",
600
- "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
601
- "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
602
- "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
603
- "model.layers.4.self_attn.q_proj.biases": "model.safetensors",
604
- "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
605
- "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
606
- "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
607
- "model.layers.4.self_attn.v_proj.biases": "model.safetensors",
608
- "model.layers.4.self_attn.v_proj.scales": "model.safetensors",
609
- "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
610
- "model.layers.5.input_layernorm.weight": "model.safetensors",
611
- "model.layers.5.mlp.down_proj.biases": "model.safetensors",
612
- "model.layers.5.mlp.down_proj.scales": "model.safetensors",
613
- "model.layers.5.mlp.down_proj.weight": "model.safetensors",
614
- "model.layers.5.mlp.gate_proj.biases": "model.safetensors",
615
- "model.layers.5.mlp.gate_proj.scales": "model.safetensors",
616
- "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
617
- "model.layers.5.mlp.up_proj.biases": "model.safetensors",
618
- "model.layers.5.mlp.up_proj.scales": "model.safetensors",
619
- "model.layers.5.mlp.up_proj.weight": "model.safetensors",
620
- "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
621
- "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
622
- "model.layers.5.self_attn.k_proj.biases": "model.safetensors",
623
- "model.layers.5.self_attn.k_proj.scales": "model.safetensors",
624
- "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
625
- "model.layers.5.self_attn.o_proj.biases": "model.safetensors",
626
- "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
627
- "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
628
- "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
629
- "model.layers.5.self_attn.q_proj.biases": "model.safetensors",
630
- "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
631
- "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
632
- "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
633
- "model.layers.5.self_attn.v_proj.biases": "model.safetensors",
634
- "model.layers.5.self_attn.v_proj.scales": "model.safetensors",
635
- "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
636
- "model.layers.6.input_layernorm.weight": "model.safetensors",
637
- "model.layers.6.mlp.down_proj.biases": "model.safetensors",
638
- "model.layers.6.mlp.down_proj.scales": "model.safetensors",
639
- "model.layers.6.mlp.down_proj.weight": "model.safetensors",
640
- "model.layers.6.mlp.gate_proj.biases": "model.safetensors",
641
- "model.layers.6.mlp.gate_proj.scales": "model.safetensors",
642
- "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
643
- "model.layers.6.mlp.up_proj.biases": "model.safetensors",
644
- "model.layers.6.mlp.up_proj.scales": "model.safetensors",
645
- "model.layers.6.mlp.up_proj.weight": "model.safetensors",
646
- "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
647
- "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
648
- "model.layers.6.self_attn.k_proj.biases": "model.safetensors",
649
- "model.layers.6.self_attn.k_proj.scales": "model.safetensors",
650
- "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
651
- "model.layers.6.self_attn.o_proj.biases": "model.safetensors",
652
- "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
653
- "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
654
- "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
655
- "model.layers.6.self_attn.q_proj.biases": "model.safetensors",
656
- "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
657
- "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
658
- "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
659
- "model.layers.6.self_attn.v_proj.biases": "model.safetensors",
660
- "model.layers.6.self_attn.v_proj.scales": "model.safetensors",
661
- "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
662
- "model.layers.7.input_layernorm.weight": "model.safetensors",
663
- "model.layers.7.mlp.down_proj.biases": "model.safetensors",
664
- "model.layers.7.mlp.down_proj.scales": "model.safetensors",
665
- "model.layers.7.mlp.down_proj.weight": "model.safetensors",
666
- "model.layers.7.mlp.gate_proj.biases": "model.safetensors",
667
- "model.layers.7.mlp.gate_proj.scales": "model.safetensors",
668
- "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
669
- "model.layers.7.mlp.up_proj.biases": "model.safetensors",
670
- "model.layers.7.mlp.up_proj.scales": "model.safetensors",
671
- "model.layers.7.mlp.up_proj.weight": "model.safetensors",
672
- "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
673
- "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
674
- "model.layers.7.self_attn.k_proj.biases": "model.safetensors",
675
- "model.layers.7.self_attn.k_proj.scales": "model.safetensors",
676
- "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
677
- "model.layers.7.self_attn.o_proj.biases": "model.safetensors",
678
- "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
679
- "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
680
- "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
681
- "model.layers.7.self_attn.q_proj.biases": "model.safetensors",
682
- "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
683
- "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
684
- "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
685
- "model.layers.7.self_attn.v_proj.biases": "model.safetensors",
686
- "model.layers.7.self_attn.v_proj.scales": "model.safetensors",
687
- "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
688
- "model.layers.8.input_layernorm.weight": "model.safetensors",
689
- "model.layers.8.mlp.down_proj.biases": "model.safetensors",
690
- "model.layers.8.mlp.down_proj.scales": "model.safetensors",
691
- "model.layers.8.mlp.down_proj.weight": "model.safetensors",
692
- "model.layers.8.mlp.gate_proj.biases": "model.safetensors",
693
- "model.layers.8.mlp.gate_proj.scales": "model.safetensors",
694
- "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
695
- "model.layers.8.mlp.up_proj.biases": "model.safetensors",
696
- "model.layers.8.mlp.up_proj.scales": "model.safetensors",
697
- "model.layers.8.mlp.up_proj.weight": "model.safetensors",
698
- "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
699
- "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
700
- "model.layers.8.self_attn.k_proj.biases": "model.safetensors",
701
- "model.layers.8.self_attn.k_proj.scales": "model.safetensors",
702
- "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
703
- "model.layers.8.self_attn.o_proj.biases": "model.safetensors",
704
- "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
705
- "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
706
- "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
707
- "model.layers.8.self_attn.q_proj.biases": "model.safetensors",
708
- "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
709
- "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
710
- "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
711
- "model.layers.8.self_attn.v_proj.biases": "model.safetensors",
712
- "model.layers.8.self_attn.v_proj.scales": "model.safetensors",
713
- "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
714
- "model.layers.9.input_layernorm.weight": "model.safetensors",
715
- "model.layers.9.mlp.down_proj.biases": "model.safetensors",
716
- "model.layers.9.mlp.down_proj.scales": "model.safetensors",
717
- "model.layers.9.mlp.down_proj.weight": "model.safetensors",
718
- "model.layers.9.mlp.gate_proj.biases": "model.safetensors",
719
- "model.layers.9.mlp.gate_proj.scales": "model.safetensors",
720
- "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
721
- "model.layers.9.mlp.up_proj.biases": "model.safetensors",
722
- "model.layers.9.mlp.up_proj.scales": "model.safetensors",
723
- "model.layers.9.mlp.up_proj.weight": "model.safetensors",
724
- "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
725
- "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
726
- "model.layers.9.self_attn.k_proj.biases": "model.safetensors",
727
- "model.layers.9.self_attn.k_proj.scales": "model.safetensors",
728
- "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
729
- "model.layers.9.self_attn.o_proj.biases": "model.safetensors",
730
- "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
731
- "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
732
- "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
733
- "model.layers.9.self_attn.q_proj.biases": "model.safetensors",
734
- "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
735
- "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
736
- "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
737
- "model.layers.9.self_attn.v_proj.biases": "model.safetensors",
738
- "model.layers.9.self_attn.v_proj.scales": "model.safetensors",
739
- "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
740
- "model.norm.weight": "model.safetensors"
741
- }
742
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3bit/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3bit/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-3bit/tokenizer_config.json DELETED
@@ -1,195 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "151643": {
7
- "content": "<|end▁of▁sentence|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "151644": {
15
- "content": "<|User|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "151645": {
23
- "content": "<|Assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "151646": {
31
- "content": "<|begin▁of▁sentence|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "151647": {
39
- "content": "<|EOT|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "151648": {
47
- "content": "<think>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "151649": {
55
- "content": "</think>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "151650": {
63
- "content": "<|quad_start|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "151651": {
71
- "content": "<|quad_end|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "151652": {
79
- "content": "<|vision_start|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "151653": {
87
- "content": "<|vision_end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "151654": {
95
- "content": "<|vision_pad|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "151655": {
103
- "content": "<|image_pad|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "151656": {
111
- "content": "<|video_pad|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "151657": {
119
- "content": "<tool_call>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "151658": {
127
- "content": "</tool_call>",
128
- "lstrip": false,
129
- "normalized": false,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "151659": {
135
- "content": "<|fim_prefix|>",
136
- "lstrip": false,
137
- "normalized": false,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "151660": {
143
- "content": "<|fim_middle|>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "151661": {
151
- "content": "<|fim_suffix|>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "151662": {
159
- "content": "<|fim_pad|>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "151663": {
167
- "content": "<|repo_name|>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "151664": {
175
- "content": "<|file_sep|>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- }
182
- },
183
- "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
- "clean_up_tokenization_spaces": false,
186
- "eos_token": "<|end▁of▁sentence|>",
187
- "extra_special_tokens": {},
188
- "legacy": true,
189
- "model_max_length": 16384,
190
- "pad_token": "<|end▁of▁sentence|>",
191
- "sp_model_kwargs": {},
192
- "tokenizer_class": "LlamaTokenizerFast",
193
- "unk_token": null,
194
- "use_default_system_prompt": false
195
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/config.json DELETED
@@ -1,1790 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen2ForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization": {
19
- "group_size": 64,
20
- "bits": null,
21
- "model.embed_tokens": {
22
- "group_size": 64,
23
- "bits": 4
24
- },
25
- "model.layers.0.self_attn.q_proj": {
26
- "group_size": 64,
27
- "bits": 4
28
- },
29
- "model.layers.0.self_attn.k_proj": {
30
- "group_size": 64,
31
- "bits": 4
32
- },
33
- "model.layers.0.self_attn.v_proj": {
34
- "group_size": 64,
35
- "bits": 6
36
- },
37
- "model.layers.0.self_attn.o_proj": {
38
- "group_size": 64,
39
- "bits": 4
40
- },
41
- "model.layers.0.self_attn.rope": false,
42
- "model.layers.0.mlp.gate_proj": {
43
- "group_size": 64,
44
- "bits": 4
45
- },
46
- "model.layers.0.mlp.down_proj": {
47
- "group_size": 64,
48
- "bits": 6
49
- },
50
- "model.layers.0.mlp.up_proj": {
51
- "group_size": 64,
52
- "bits": 4
53
- },
54
- "model.layers.0.input_layernorm": false,
55
- "model.layers.0.post_attention_layernorm": false,
56
- "model.layers.1.self_attn.q_proj": {
57
- "group_size": 64,
58
- "bits": 4
59
- },
60
- "model.layers.1.self_attn.k_proj": {
61
- "group_size": 64,
62
- "bits": 4
63
- },
64
- "model.layers.1.self_attn.v_proj": {
65
- "group_size": 64,
66
- "bits": 6
67
- },
68
- "model.layers.1.self_attn.o_proj": {
69
- "group_size": 64,
70
- "bits": 4
71
- },
72
- "model.layers.1.self_attn.rope": false,
73
- "model.layers.1.mlp.gate_proj": {
74
- "group_size": 64,
75
- "bits": 4
76
- },
77
- "model.layers.1.mlp.down_proj": {
78
- "group_size": 64,
79
- "bits": 6
80
- },
81
- "model.layers.1.mlp.up_proj": {
82
- "group_size": 64,
83
- "bits": 4
84
- },
85
- "model.layers.1.input_layernorm": false,
86
- "model.layers.1.post_attention_layernorm": false,
87
- "model.layers.2.self_attn.q_proj": {
88
- "group_size": 64,
89
- "bits": 4
90
- },
91
- "model.layers.2.self_attn.k_proj": {
92
- "group_size": 64,
93
- "bits": 4
94
- },
95
- "model.layers.2.self_attn.v_proj": {
96
- "group_size": 64,
97
- "bits": 6
98
- },
99
- "model.layers.2.self_attn.o_proj": {
100
- "group_size": 64,
101
- "bits": 4
102
- },
103
- "model.layers.2.self_attn.rope": false,
104
- "model.layers.2.mlp.gate_proj": {
105
- "group_size": 64,
106
- "bits": 4
107
- },
108
- "model.layers.2.mlp.down_proj": {
109
- "group_size": 64,
110
- "bits": 6
111
- },
112
- "model.layers.2.mlp.up_proj": {
113
- "group_size": 64,
114
- "bits": 4
115
- },
116
- "model.layers.2.input_layernorm": false,
117
- "model.layers.2.post_attention_layernorm": false,
118
- "model.layers.3.self_attn.q_proj": {
119
- "group_size": 64,
120
- "bits": 4
121
- },
122
- "model.layers.3.self_attn.k_proj": {
123
- "group_size": 64,
124
- "bits": 4
125
- },
126
- "model.layers.3.self_attn.v_proj": {
127
- "group_size": 64,
128
- "bits": 4
129
- },
130
- "model.layers.3.self_attn.o_proj": {
131
- "group_size": 64,
132
- "bits": 4
133
- },
134
- "model.layers.3.self_attn.rope": false,
135
- "model.layers.3.mlp.gate_proj": {
136
- "group_size": 64,
137
- "bits": 4
138
- },
139
- "model.layers.3.mlp.down_proj": {
140
- "group_size": 64,
141
- "bits": 4
142
- },
143
- "model.layers.3.mlp.up_proj": {
144
- "group_size": 64,
145
- "bits": 4
146
- },
147
- "model.layers.3.input_layernorm": false,
148
- "model.layers.3.post_attention_layernorm": false,
149
- "model.layers.4.self_attn.q_proj": {
150
- "group_size": 64,
151
- "bits": 4
152
- },
153
- "model.layers.4.self_attn.k_proj": {
154
- "group_size": 64,
155
- "bits": 4
156
- },
157
- "model.layers.4.self_attn.v_proj": {
158
- "group_size": 64,
159
- "bits": 4
160
- },
161
- "model.layers.4.self_attn.o_proj": {
162
- "group_size": 64,
163
- "bits": 4
164
- },
165
- "model.layers.4.self_attn.rope": false,
166
- "model.layers.4.mlp.gate_proj": {
167
- "group_size": 64,
168
- "bits": 4
169
- },
170
- "model.layers.4.mlp.down_proj": {
171
- "group_size": 64,
172
- "bits": 4
173
- },
174
- "model.layers.4.mlp.up_proj": {
175
- "group_size": 64,
176
- "bits": 4
177
- },
178
- "model.layers.4.input_layernorm": false,
179
- "model.layers.4.post_attention_layernorm": false,
180
- "model.layers.5.self_attn.q_proj": {
181
- "group_size": 64,
182
- "bits": 4
183
- },
184
- "model.layers.5.self_attn.k_proj": {
185
- "group_size": 64,
186
- "bits": 4
187
- },
188
- "model.layers.5.self_attn.v_proj": {
189
- "group_size": 64,
190
- "bits": 6
191
- },
192
- "model.layers.5.self_attn.o_proj": {
193
- "group_size": 64,
194
- "bits": 4
195
- },
196
- "model.layers.5.self_attn.rope": false,
197
- "model.layers.5.mlp.gate_proj": {
198
- "group_size": 64,
199
- "bits": 4
200
- },
201
- "model.layers.5.mlp.down_proj": {
202
- "group_size": 64,
203
- "bits": 6
204
- },
205
- "model.layers.5.mlp.up_proj": {
206
- "group_size": 64,
207
- "bits": 4
208
- },
209
- "model.layers.5.input_layernorm": false,
210
- "model.layers.5.post_attention_layernorm": false,
211
- "model.layers.6.self_attn.q_proj": {
212
- "group_size": 64,
213
- "bits": 4
214
- },
215
- "model.layers.6.self_attn.k_proj": {
216
- "group_size": 64,
217
- "bits": 4
218
- },
219
- "model.layers.6.self_attn.v_proj": {
220
- "group_size": 64,
221
- "bits": 4
222
- },
223
- "model.layers.6.self_attn.o_proj": {
224
- "group_size": 64,
225
- "bits": 4
226
- },
227
- "model.layers.6.self_attn.rope": false,
228
- "model.layers.6.mlp.gate_proj": {
229
- "group_size": 64,
230
- "bits": 4
231
- },
232
- "model.layers.6.mlp.down_proj": {
233
- "group_size": 64,
234
- "bits": 4
235
- },
236
- "model.layers.6.mlp.up_proj": {
237
- "group_size": 64,
238
- "bits": 4
239
- },
240
- "model.layers.6.input_layernorm": false,
241
- "model.layers.6.post_attention_layernorm": false,
242
- "model.layers.7.self_attn.q_proj": {
243
- "group_size": 64,
244
- "bits": 4
245
- },
246
- "model.layers.7.self_attn.k_proj": {
247
- "group_size": 64,
248
- "bits": 4
249
- },
250
- "model.layers.7.self_attn.v_proj": {
251
- "group_size": 64,
252
- "bits": 4
253
- },
254
- "model.layers.7.self_attn.o_proj": {
255
- "group_size": 64,
256
- "bits": 4
257
- },
258
- "model.layers.7.self_attn.rope": false,
259
- "model.layers.7.mlp.gate_proj": {
260
- "group_size": 64,
261
- "bits": 4
262
- },
263
- "model.layers.7.mlp.down_proj": {
264
- "group_size": 64,
265
- "bits": 4
266
- },
267
- "model.layers.7.mlp.up_proj": {
268
- "group_size": 64,
269
- "bits": 4
270
- },
271
- "model.layers.7.input_layernorm": false,
272
- "model.layers.7.post_attention_layernorm": false,
273
- "model.layers.8.self_attn.q_proj": {
274
- "group_size": 64,
275
- "bits": 4
276
- },
277
- "model.layers.8.self_attn.k_proj": {
278
- "group_size": 64,
279
- "bits": 4
280
- },
281
- "model.layers.8.self_attn.v_proj": {
282
- "group_size": 64,
283
- "bits": 6
284
- },
285
- "model.layers.8.self_attn.o_proj": {
286
- "group_size": 64,
287
- "bits": 4
288
- },
289
- "model.layers.8.self_attn.rope": false,
290
- "model.layers.8.mlp.gate_proj": {
291
- "group_size": 64,
292
- "bits": 4
293
- },
294
- "model.layers.8.mlp.down_proj": {
295
- "group_size": 64,
296
- "bits": 6
297
- },
298
- "model.layers.8.mlp.up_proj": {
299
- "group_size": 64,
300
- "bits": 4
301
- },
302
- "model.layers.8.input_layernorm": false,
303
- "model.layers.8.post_attention_layernorm": false,
304
- "model.layers.9.self_attn.q_proj": {
305
- "group_size": 64,
306
- "bits": 4
307
- },
308
- "model.layers.9.self_attn.k_proj": {
309
- "group_size": 64,
310
- "bits": 4
311
- },
312
- "model.layers.9.self_attn.v_proj": {
313
- "group_size": 64,
314
- "bits": 4
315
- },
316
- "model.layers.9.self_attn.o_proj": {
317
- "group_size": 64,
318
- "bits": 4
319
- },
320
- "model.layers.9.self_attn.rope": false,
321
- "model.layers.9.mlp.gate_proj": {
322
- "group_size": 64,
323
- "bits": 4
324
- },
325
- "model.layers.9.mlp.down_proj": {
326
- "group_size": 64,
327
- "bits": 4
328
- },
329
- "model.layers.9.mlp.up_proj": {
330
- "group_size": 64,
331
- "bits": 4
332
- },
333
- "model.layers.9.input_layernorm": false,
334
- "model.layers.9.post_attention_layernorm": false,
335
- "model.layers.10.self_attn.q_proj": {
336
- "group_size": 64,
337
- "bits": 4
338
- },
339
- "model.layers.10.self_attn.k_proj": {
340
- "group_size": 64,
341
- "bits": 4
342
- },
343
- "model.layers.10.self_attn.v_proj": {
344
- "group_size": 64,
345
- "bits": 4
346
- },
347
- "model.layers.10.self_attn.o_proj": {
348
- "group_size": 64,
349
- "bits": 4
350
- },
351
- "model.layers.10.self_attn.rope": false,
352
- "model.layers.10.mlp.gate_proj": {
353
- "group_size": 64,
354
- "bits": 4
355
- },
356
- "model.layers.10.mlp.down_proj": {
357
- "group_size": 64,
358
- "bits": 4
359
- },
360
- "model.layers.10.mlp.up_proj": {
361
- "group_size": 64,
362
- "bits": 4
363
- },
364
- "model.layers.10.input_layernorm": false,
365
- "model.layers.10.post_attention_layernorm": false,
366
- "model.layers.11.self_attn.q_proj": {
367
- "group_size": 64,
368
- "bits": 4
369
- },
370
- "model.layers.11.self_attn.k_proj": {
371
- "group_size": 64,
372
- "bits": 4
373
- },
374
- "model.layers.11.self_attn.v_proj": {
375
- "group_size": 64,
376
- "bits": 6
377
- },
378
- "model.layers.11.self_attn.o_proj": {
379
- "group_size": 64,
380
- "bits": 4
381
- },
382
- "model.layers.11.self_attn.rope": false,
383
- "model.layers.11.mlp.gate_proj": {
384
- "group_size": 64,
385
- "bits": 4
386
- },
387
- "model.layers.11.mlp.down_proj": {
388
- "group_size": 64,
389
- "bits": 6
390
- },
391
- "model.layers.11.mlp.up_proj": {
392
- "group_size": 64,
393
- "bits": 4
394
- },
395
- "model.layers.11.input_layernorm": false,
396
- "model.layers.11.post_attention_layernorm": false,
397
- "model.layers.12.self_attn.q_proj": {
398
- "group_size": 64,
399
- "bits": 4
400
- },
401
- "model.layers.12.self_attn.k_proj": {
402
- "group_size": 64,
403
- "bits": 4
404
- },
405
- "model.layers.12.self_attn.v_proj": {
406
- "group_size": 64,
407
- "bits": 4
408
- },
409
- "model.layers.12.self_attn.o_proj": {
410
- "group_size": 64,
411
- "bits": 4
412
- },
413
- "model.layers.12.self_attn.rope": false,
414
- "model.layers.12.mlp.gate_proj": {
415
- "group_size": 64,
416
- "bits": 4
417
- },
418
- "model.layers.12.mlp.down_proj": {
419
- "group_size": 64,
420
- "bits": 4
421
- },
422
- "model.layers.12.mlp.up_proj": {
423
- "group_size": 64,
424
- "bits": 4
425
- },
426
- "model.layers.12.input_layernorm": false,
427
- "model.layers.12.post_attention_layernorm": false,
428
- "model.layers.13.self_attn.q_proj": {
429
- "group_size": 64,
430
- "bits": 4
431
- },
432
- "model.layers.13.self_attn.k_proj": {
433
- "group_size": 64,
434
- "bits": 4
435
- },
436
- "model.layers.13.self_attn.v_proj": {
437
- "group_size": 64,
438
- "bits": 4
439
- },
440
- "model.layers.13.self_attn.o_proj": {
441
- "group_size": 64,
442
- "bits": 4
443
- },
444
- "model.layers.13.self_attn.rope": false,
445
- "model.layers.13.mlp.gate_proj": {
446
- "group_size": 64,
447
- "bits": 4
448
- },
449
- "model.layers.13.mlp.down_proj": {
450
- "group_size": 64,
451
- "bits": 4
452
- },
453
- "model.layers.13.mlp.up_proj": {
454
- "group_size": 64,
455
- "bits": 4
456
- },
457
- "model.layers.13.input_layernorm": false,
458
- "model.layers.13.post_attention_layernorm": false,
459
- "model.layers.14.self_attn.q_proj": {
460
- "group_size": 64,
461
- "bits": 4
462
- },
463
- "model.layers.14.self_attn.k_proj": {
464
- "group_size": 64,
465
- "bits": 4
466
- },
467
- "model.layers.14.self_attn.v_proj": {
468
- "group_size": 64,
469
- "bits": 6
470
- },
471
- "model.layers.14.self_attn.o_proj": {
472
- "group_size": 64,
473
- "bits": 4
474
- },
475
- "model.layers.14.self_attn.rope": false,
476
- "model.layers.14.mlp.gate_proj": {
477
- "group_size": 64,
478
- "bits": 4
479
- },
480
- "model.layers.14.mlp.down_proj": {
481
- "group_size": 64,
482
- "bits": 6
483
- },
484
- "model.layers.14.mlp.up_proj": {
485
- "group_size": 64,
486
- "bits": 4
487
- },
488
- "model.layers.14.input_layernorm": false,
489
- "model.layers.14.post_attention_layernorm": false,
490
- "model.layers.15.self_attn.q_proj": {
491
- "group_size": 64,
492
- "bits": 4
493
- },
494
- "model.layers.15.self_attn.k_proj": {
495
- "group_size": 64,
496
- "bits": 4
497
- },
498
- "model.layers.15.self_attn.v_proj": {
499
- "group_size": 64,
500
- "bits": 4
501
- },
502
- "model.layers.15.self_attn.o_proj": {
503
- "group_size": 64,
504
- "bits": 4
505
- },
506
- "model.layers.15.self_attn.rope": false,
507
- "model.layers.15.mlp.gate_proj": {
508
- "group_size": 64,
509
- "bits": 4
510
- },
511
- "model.layers.15.mlp.down_proj": {
512
- "group_size": 64,
513
- "bits": 4
514
- },
515
- "model.layers.15.mlp.up_proj": {
516
- "group_size": 64,
517
- "bits": 4
518
- },
519
- "model.layers.15.input_layernorm": false,
520
- "model.layers.15.post_attention_layernorm": false,
521
- "model.layers.16.self_attn.q_proj": {
522
- "group_size": 64,
523
- "bits": 4
524
- },
525
- "model.layers.16.self_attn.k_proj": {
526
- "group_size": 64,
527
- "bits": 4
528
- },
529
- "model.layers.16.self_attn.v_proj": {
530
- "group_size": 64,
531
- "bits": 4
532
- },
533
- "model.layers.16.self_attn.o_proj": {
534
- "group_size": 64,
535
- "bits": 4
536
- },
537
- "model.layers.16.self_attn.rope": false,
538
- "model.layers.16.mlp.gate_proj": {
539
- "group_size": 64,
540
- "bits": 4
541
- },
542
- "model.layers.16.mlp.down_proj": {
543
- "group_size": 64,
544
- "bits": 4
545
- },
546
- "model.layers.16.mlp.up_proj": {
547
- "group_size": 64,
548
- "bits": 4
549
- },
550
- "model.layers.16.input_layernorm": false,
551
- "model.layers.16.post_attention_layernorm": false,
552
- "model.layers.17.self_attn.q_proj": {
553
- "group_size": 64,
554
- "bits": 4
555
- },
556
- "model.layers.17.self_attn.k_proj": {
557
- "group_size": 64,
558
- "bits": 4
559
- },
560
- "model.layers.17.self_attn.v_proj": {
561
- "group_size": 64,
562
- "bits": 6
563
- },
564
- "model.layers.17.self_attn.o_proj": {
565
- "group_size": 64,
566
- "bits": 4
567
- },
568
- "model.layers.17.self_attn.rope": false,
569
- "model.layers.17.mlp.gate_proj": {
570
- "group_size": 64,
571
- "bits": 4
572
- },
573
- "model.layers.17.mlp.down_proj": {
574
- "group_size": 64,
575
- "bits": 6
576
- },
577
- "model.layers.17.mlp.up_proj": {
578
- "group_size": 64,
579
- "bits": 4
580
- },
581
- "model.layers.17.input_layernorm": false,
582
- "model.layers.17.post_attention_layernorm": false,
583
- "model.layers.18.self_attn.q_proj": {
584
- "group_size": 64,
585
- "bits": 4
586
- },
587
- "model.layers.18.self_attn.k_proj": {
588
- "group_size": 64,
589
- "bits": 4
590
- },
591
- "model.layers.18.self_attn.v_proj": {
592
- "group_size": 64,
593
- "bits": 4
594
- },
595
- "model.layers.18.self_attn.o_proj": {
596
- "group_size": 64,
597
- "bits": 4
598
- },
599
- "model.layers.18.self_attn.rope": false,
600
- "model.layers.18.mlp.gate_proj": {
601
- "group_size": 64,
602
- "bits": 4
603
- },
604
- "model.layers.18.mlp.down_proj": {
605
- "group_size": 64,
606
- "bits": 4
607
- },
608
- "model.layers.18.mlp.up_proj": {
609
- "group_size": 64,
610
- "bits": 4
611
- },
612
- "model.layers.18.input_layernorm": false,
613
- "model.layers.18.post_attention_layernorm": false,
614
- "model.layers.19.self_attn.q_proj": {
615
- "group_size": 64,
616
- "bits": 4
617
- },
618
- "model.layers.19.self_attn.k_proj": {
619
- "group_size": 64,
620
- "bits": 4
621
- },
622
- "model.layers.19.self_attn.v_proj": {
623
- "group_size": 64,
624
- "bits": 4
625
- },
626
- "model.layers.19.self_attn.o_proj": {
627
- "group_size": 64,
628
- "bits": 4
629
- },
630
- "model.layers.19.self_attn.rope": false,
631
- "model.layers.19.mlp.gate_proj": {
632
- "group_size": 64,
633
- "bits": 4
634
- },
635
- "model.layers.19.mlp.down_proj": {
636
- "group_size": 64,
637
- "bits": 4
638
- },
639
- "model.layers.19.mlp.up_proj": {
640
- "group_size": 64,
641
- "bits": 4
642
- },
643
- "model.layers.19.input_layernorm": false,
644
- "model.layers.19.post_attention_layernorm": false,
645
- "model.layers.20.self_attn.q_proj": {
646
- "group_size": 64,
647
- "bits": 4
648
- },
649
- "model.layers.20.self_attn.k_proj": {
650
- "group_size": 64,
651
- "bits": 4
652
- },
653
- "model.layers.20.self_attn.v_proj": {
654
- "group_size": 64,
655
- "bits": 6
656
- },
657
- "model.layers.20.self_attn.o_proj": {
658
- "group_size": 64,
659
- "bits": 4
660
- },
661
- "model.layers.20.self_attn.rope": false,
662
- "model.layers.20.mlp.gate_proj": {
663
- "group_size": 64,
664
- "bits": 4
665
- },
666
- "model.layers.20.mlp.down_proj": {
667
- "group_size": 64,
668
- "bits": 6
669
- },
670
- "model.layers.20.mlp.up_proj": {
671
- "group_size": 64,
672
- "bits": 4
673
- },
674
- "model.layers.20.input_layernorm": false,
675
- "model.layers.20.post_attention_layernorm": false,
676
- "model.layers.21.self_attn.q_proj": {
677
- "group_size": 64,
678
- "bits": 4
679
- },
680
- "model.layers.21.self_attn.k_proj": {
681
- "group_size": 64,
682
- "bits": 4
683
- },
684
- "model.layers.21.self_attn.v_proj": {
685
- "group_size": 64,
686
- "bits": 4
687
- },
688
- "model.layers.21.self_attn.o_proj": {
689
- "group_size": 64,
690
- "bits": 4
691
- },
692
- "model.layers.21.self_attn.rope": false,
693
- "model.layers.21.mlp.gate_proj": {
694
- "group_size": 64,
695
- "bits": 4
696
- },
697
- "model.layers.21.mlp.down_proj": {
698
- "group_size": 64,
699
- "bits": 4
700
- },
701
- "model.layers.21.mlp.up_proj": {
702
- "group_size": 64,
703
- "bits": 4
704
- },
705
- "model.layers.21.input_layernorm": false,
706
- "model.layers.21.post_attention_layernorm": false,
707
- "model.layers.22.self_attn.q_proj": {
708
- "group_size": 64,
709
- "bits": 4
710
- },
711
- "model.layers.22.self_attn.k_proj": {
712
- "group_size": 64,
713
- "bits": 4
714
- },
715
- "model.layers.22.self_attn.v_proj": {
716
- "group_size": 64,
717
- "bits": 4
718
- },
719
- "model.layers.22.self_attn.o_proj": {
720
- "group_size": 64,
721
- "bits": 4
722
- },
723
- "model.layers.22.self_attn.rope": false,
724
- "model.layers.22.mlp.gate_proj": {
725
- "group_size": 64,
726
- "bits": 4
727
- },
728
- "model.layers.22.mlp.down_proj": {
729
- "group_size": 64,
730
- "bits": 4
731
- },
732
- "model.layers.22.mlp.up_proj": {
733
- "group_size": 64,
734
- "bits": 4
735
- },
736
- "model.layers.22.input_layernorm": false,
737
- "model.layers.22.post_attention_layernorm": false,
738
- "model.layers.23.self_attn.q_proj": {
739
- "group_size": 64,
740
- "bits": 4
741
- },
742
- "model.layers.23.self_attn.k_proj": {
743
- "group_size": 64,
744
- "bits": 4
745
- },
746
- "model.layers.23.self_attn.v_proj": {
747
- "group_size": 64,
748
- "bits": 6
749
- },
750
- "model.layers.23.self_attn.o_proj": {
751
- "group_size": 64,
752
- "bits": 4
753
- },
754
- "model.layers.23.self_attn.rope": false,
755
- "model.layers.23.mlp.gate_proj": {
756
- "group_size": 64,
757
- "bits": 4
758
- },
759
- "model.layers.23.mlp.down_proj": {
760
- "group_size": 64,
761
- "bits": 6
762
- },
763
- "model.layers.23.mlp.up_proj": {
764
- "group_size": 64,
765
- "bits": 4
766
- },
767
- "model.layers.23.input_layernorm": false,
768
- "model.layers.23.post_attention_layernorm": false,
769
- "model.layers.24.self_attn.q_proj": {
770
- "group_size": 64,
771
- "bits": 4
772
- },
773
- "model.layers.24.self_attn.k_proj": {
774
- "group_size": 64,
775
- "bits": 4
776
- },
777
- "model.layers.24.self_attn.v_proj": {
778
- "group_size": 64,
779
- "bits": 6
780
- },
781
- "model.layers.24.self_attn.o_proj": {
782
- "group_size": 64,
783
- "bits": 4
784
- },
785
- "model.layers.24.self_attn.rope": false,
786
- "model.layers.24.mlp.gate_proj": {
787
- "group_size": 64,
788
- "bits": 4
789
- },
790
- "model.layers.24.mlp.down_proj": {
791
- "group_size": 64,
792
- "bits": 6
793
- },
794
- "model.layers.24.mlp.up_proj": {
795
- "group_size": 64,
796
- "bits": 4
797
- },
798
- "model.layers.24.input_layernorm": false,
799
- "model.layers.24.post_attention_layernorm": false,
800
- "model.layers.25.self_attn.q_proj": {
801
- "group_size": 64,
802
- "bits": 4
803
- },
804
- "model.layers.25.self_attn.k_proj": {
805
- "group_size": 64,
806
- "bits": 4
807
- },
808
- "model.layers.25.self_attn.v_proj": {
809
- "group_size": 64,
810
- "bits": 6
811
- },
812
- "model.layers.25.self_attn.o_proj": {
813
- "group_size": 64,
814
- "bits": 4
815
- },
816
- "model.layers.25.self_attn.rope": false,
817
- "model.layers.25.mlp.gate_proj": {
818
- "group_size": 64,
819
- "bits": 4
820
- },
821
- "model.layers.25.mlp.down_proj": {
822
- "group_size": 64,
823
- "bits": 6
824
- },
825
- "model.layers.25.mlp.up_proj": {
826
- "group_size": 64,
827
- "bits": 4
828
- },
829
- "model.layers.25.input_layernorm": false,
830
- "model.layers.25.post_attention_layernorm": false,
831
- "model.layers.26.self_attn.q_proj": {
832
- "group_size": 64,
833
- "bits": 4
834
- },
835
- "model.layers.26.self_attn.k_proj": {
836
- "group_size": 64,
837
- "bits": 4
838
- },
839
- "model.layers.26.self_attn.v_proj": {
840
- "group_size": 64,
841
- "bits": 6
842
- },
843
- "model.layers.26.self_attn.o_proj": {
844
- "group_size": 64,
845
- "bits": 4
846
- },
847
- "model.layers.26.self_attn.rope": false,
848
- "model.layers.26.mlp.gate_proj": {
849
- "group_size": 64,
850
- "bits": 4
851
- },
852
- "model.layers.26.mlp.down_proj": {
853
- "group_size": 64,
854
- "bits": 6
855
- },
856
- "model.layers.26.mlp.up_proj": {
857
- "group_size": 64,
858
- "bits": 4
859
- },
860
- "model.layers.26.input_layernorm": false,
861
- "model.layers.26.post_attention_layernorm": false,
862
- "model.layers.27.self_attn.q_proj": {
863
- "group_size": 64,
864
- "bits": 4
865
- },
866
- "model.layers.27.self_attn.k_proj": {
867
- "group_size": 64,
868
- "bits": 4
869
- },
870
- "model.layers.27.self_attn.v_proj": {
871
- "group_size": 64,
872
- "bits": 6
873
- },
874
- "model.layers.27.self_attn.o_proj": {
875
- "group_size": 64,
876
- "bits": 4
877
- },
878
- "model.layers.27.self_attn.rope": false,
879
- "model.layers.27.mlp.gate_proj": {
880
- "group_size": 64,
881
- "bits": 4
882
- },
883
- "model.layers.27.mlp.down_proj": {
884
- "group_size": 64,
885
- "bits": 6
886
- },
887
- "model.layers.27.mlp.up_proj": {
888
- "group_size": 64,
889
- "bits": 4
890
- },
891
- "model.layers.27.input_layernorm": false,
892
- "model.layers.27.post_attention_layernorm": false,
893
- "model.norm": false,
894
- "lm_head": {
895
- "group_size": 64,
896
- "bits": 6
897
- }
898
- },
899
- "quantization_config": {
900
- "group_size": 64,
901
- "bits": null,
902
- "model.embed_tokens": {
903
- "group_size": 64,
904
- "bits": 4
905
- },
906
- "model.layers.0.self_attn.q_proj": {
907
- "group_size": 64,
908
- "bits": 4
909
- },
910
- "model.layers.0.self_attn.k_proj": {
911
- "group_size": 64,
912
- "bits": 4
913
- },
914
- "model.layers.0.self_attn.v_proj": {
915
- "group_size": 64,
916
- "bits": 6
917
- },
918
- "model.layers.0.self_attn.o_proj": {
919
- "group_size": 64,
920
- "bits": 4
921
- },
922
- "model.layers.0.self_attn.rope": false,
923
- "model.layers.0.mlp.gate_proj": {
924
- "group_size": 64,
925
- "bits": 4
926
- },
927
- "model.layers.0.mlp.down_proj": {
928
- "group_size": 64,
929
- "bits": 6
930
- },
931
- "model.layers.0.mlp.up_proj": {
932
- "group_size": 64,
933
- "bits": 4
934
- },
935
- "model.layers.0.input_layernorm": false,
936
- "model.layers.0.post_attention_layernorm": false,
937
- "model.layers.1.self_attn.q_proj": {
938
- "group_size": 64,
939
- "bits": 4
940
- },
941
- "model.layers.1.self_attn.k_proj": {
942
- "group_size": 64,
943
- "bits": 4
944
- },
945
- "model.layers.1.self_attn.v_proj": {
946
- "group_size": 64,
947
- "bits": 6
948
- },
949
- "model.layers.1.self_attn.o_proj": {
950
- "group_size": 64,
951
- "bits": 4
952
- },
953
- "model.layers.1.self_attn.rope": false,
954
- "model.layers.1.mlp.gate_proj": {
955
- "group_size": 64,
956
- "bits": 4
957
- },
958
- "model.layers.1.mlp.down_proj": {
959
- "group_size": 64,
960
- "bits": 6
961
- },
962
- "model.layers.1.mlp.up_proj": {
963
- "group_size": 64,
964
- "bits": 4
965
- },
966
- "model.layers.1.input_layernorm": false,
967
- "model.layers.1.post_attention_layernorm": false,
968
- "model.layers.2.self_attn.q_proj": {
969
- "group_size": 64,
970
- "bits": 4
971
- },
972
- "model.layers.2.self_attn.k_proj": {
973
- "group_size": 64,
974
- "bits": 4
975
- },
976
- "model.layers.2.self_attn.v_proj": {
977
- "group_size": 64,
978
- "bits": 6
979
- },
980
- "model.layers.2.self_attn.o_proj": {
981
- "group_size": 64,
982
- "bits": 4
983
- },
984
- "model.layers.2.self_attn.rope": false,
985
- "model.layers.2.mlp.gate_proj": {
986
- "group_size": 64,
987
- "bits": 4
988
- },
989
- "model.layers.2.mlp.down_proj": {
990
- "group_size": 64,
991
- "bits": 6
992
- },
993
- "model.layers.2.mlp.up_proj": {
994
- "group_size": 64,
995
- "bits": 4
996
- },
997
- "model.layers.2.input_layernorm": false,
998
- "model.layers.2.post_attention_layernorm": false,
999
- "model.layers.3.self_attn.q_proj": {
1000
- "group_size": 64,
1001
- "bits": 4
1002
- },
1003
- "model.layers.3.self_attn.k_proj": {
1004
- "group_size": 64,
1005
- "bits": 4
1006
- },
1007
- "model.layers.3.self_attn.v_proj": {
1008
- "group_size": 64,
1009
- "bits": 4
1010
- },
1011
- "model.layers.3.self_attn.o_proj": {
1012
- "group_size": 64,
1013
- "bits": 4
1014
- },
1015
- "model.layers.3.self_attn.rope": false,
1016
- "model.layers.3.mlp.gate_proj": {
1017
- "group_size": 64,
1018
- "bits": 4
1019
- },
1020
- "model.layers.3.mlp.down_proj": {
1021
- "group_size": 64,
1022
- "bits": 4
1023
- },
1024
- "model.layers.3.mlp.up_proj": {
1025
- "group_size": 64,
1026
- "bits": 4
1027
- },
1028
- "model.layers.3.input_layernorm": false,
1029
- "model.layers.3.post_attention_layernorm": false,
1030
- "model.layers.4.self_attn.q_proj": {
1031
- "group_size": 64,
1032
- "bits": 4
1033
- },
1034
- "model.layers.4.self_attn.k_proj": {
1035
- "group_size": 64,
1036
- "bits": 4
1037
- },
1038
- "model.layers.4.self_attn.v_proj": {
1039
- "group_size": 64,
1040
- "bits": 4
1041
- },
1042
- "model.layers.4.self_attn.o_proj": {
1043
- "group_size": 64,
1044
- "bits": 4
1045
- },
1046
- "model.layers.4.self_attn.rope": false,
1047
- "model.layers.4.mlp.gate_proj": {
1048
- "group_size": 64,
1049
- "bits": 4
1050
- },
1051
- "model.layers.4.mlp.down_proj": {
1052
- "group_size": 64,
1053
- "bits": 4
1054
- },
1055
- "model.layers.4.mlp.up_proj": {
1056
- "group_size": 64,
1057
- "bits": 4
1058
- },
1059
- "model.layers.4.input_layernorm": false,
1060
- "model.layers.4.post_attention_layernorm": false,
1061
- "model.layers.5.self_attn.q_proj": {
1062
- "group_size": 64,
1063
- "bits": 4
1064
- },
1065
- "model.layers.5.self_attn.k_proj": {
1066
- "group_size": 64,
1067
- "bits": 4
1068
- },
1069
- "model.layers.5.self_attn.v_proj": {
1070
- "group_size": 64,
1071
- "bits": 6
1072
- },
1073
- "model.layers.5.self_attn.o_proj": {
1074
- "group_size": 64,
1075
- "bits": 4
1076
- },
1077
- "model.layers.5.self_attn.rope": false,
1078
- "model.layers.5.mlp.gate_proj": {
1079
- "group_size": 64,
1080
- "bits": 4
1081
- },
1082
- "model.layers.5.mlp.down_proj": {
1083
- "group_size": 64,
1084
- "bits": 6
1085
- },
1086
- "model.layers.5.mlp.up_proj": {
1087
- "group_size": 64,
1088
- "bits": 4
1089
- },
1090
- "model.layers.5.input_layernorm": false,
1091
- "model.layers.5.post_attention_layernorm": false,
1092
- "model.layers.6.self_attn.q_proj": {
1093
- "group_size": 64,
1094
- "bits": 4
1095
- },
1096
- "model.layers.6.self_attn.k_proj": {
1097
- "group_size": 64,
1098
- "bits": 4
1099
- },
1100
- "model.layers.6.self_attn.v_proj": {
1101
- "group_size": 64,
1102
- "bits": 4
1103
- },
1104
- "model.layers.6.self_attn.o_proj": {
1105
- "group_size": 64,
1106
- "bits": 4
1107
- },
1108
- "model.layers.6.self_attn.rope": false,
1109
- "model.layers.6.mlp.gate_proj": {
1110
- "group_size": 64,
1111
- "bits": 4
1112
- },
1113
- "model.layers.6.mlp.down_proj": {
1114
- "group_size": 64,
1115
- "bits": 4
1116
- },
1117
- "model.layers.6.mlp.up_proj": {
1118
- "group_size": 64,
1119
- "bits": 4
1120
- },
1121
- "model.layers.6.input_layernorm": false,
1122
- "model.layers.6.post_attention_layernorm": false,
1123
- "model.layers.7.self_attn.q_proj": {
1124
- "group_size": 64,
1125
- "bits": 4
1126
- },
1127
- "model.layers.7.self_attn.k_proj": {
1128
- "group_size": 64,
1129
- "bits": 4
1130
- },
1131
- "model.layers.7.self_attn.v_proj": {
1132
- "group_size": 64,
1133
- "bits": 4
1134
- },
1135
- "model.layers.7.self_attn.o_proj": {
1136
- "group_size": 64,
1137
- "bits": 4
1138
- },
1139
- "model.layers.7.self_attn.rope": false,
1140
- "model.layers.7.mlp.gate_proj": {
1141
- "group_size": 64,
1142
- "bits": 4
1143
- },
1144
- "model.layers.7.mlp.down_proj": {
1145
- "group_size": 64,
1146
- "bits": 4
1147
- },
1148
- "model.layers.7.mlp.up_proj": {
1149
- "group_size": 64,
1150
- "bits": 4
1151
- },
1152
- "model.layers.7.input_layernorm": false,
1153
- "model.layers.7.post_attention_layernorm": false,
1154
- "model.layers.8.self_attn.q_proj": {
1155
- "group_size": 64,
1156
- "bits": 4
1157
- },
1158
- "model.layers.8.self_attn.k_proj": {
1159
- "group_size": 64,
1160
- "bits": 4
1161
- },
1162
- "model.layers.8.self_attn.v_proj": {
1163
- "group_size": 64,
1164
- "bits": 6
1165
- },
1166
- "model.layers.8.self_attn.o_proj": {
1167
- "group_size": 64,
1168
- "bits": 4
1169
- },
1170
- "model.layers.8.self_attn.rope": false,
1171
- "model.layers.8.mlp.gate_proj": {
1172
- "group_size": 64,
1173
- "bits": 4
1174
- },
1175
- "model.layers.8.mlp.down_proj": {
1176
- "group_size": 64,
1177
- "bits": 6
1178
- },
1179
- "model.layers.8.mlp.up_proj": {
1180
- "group_size": 64,
1181
- "bits": 4
1182
- },
1183
- "model.layers.8.input_layernorm": false,
1184
- "model.layers.8.post_attention_layernorm": false,
1185
- "model.layers.9.self_attn.q_proj": {
1186
- "group_size": 64,
1187
- "bits": 4
1188
- },
1189
- "model.layers.9.self_attn.k_proj": {
1190
- "group_size": 64,
1191
- "bits": 4
1192
- },
1193
- "model.layers.9.self_attn.v_proj": {
1194
- "group_size": 64,
1195
- "bits": 4
1196
- },
1197
- "model.layers.9.self_attn.o_proj": {
1198
- "group_size": 64,
1199
- "bits": 4
1200
- },
1201
- "model.layers.9.self_attn.rope": false,
1202
- "model.layers.9.mlp.gate_proj": {
1203
- "group_size": 64,
1204
- "bits": 4
1205
- },
1206
- "model.layers.9.mlp.down_proj": {
1207
- "group_size": 64,
1208
- "bits": 4
1209
- },
1210
- "model.layers.9.mlp.up_proj": {
1211
- "group_size": 64,
1212
- "bits": 4
1213
- },
1214
- "model.layers.9.input_layernorm": false,
1215
- "model.layers.9.post_attention_layernorm": false,
1216
- "model.layers.10.self_attn.q_proj": {
1217
- "group_size": 64,
1218
- "bits": 4
1219
- },
1220
- "model.layers.10.self_attn.k_proj": {
1221
- "group_size": 64,
1222
- "bits": 4
1223
- },
1224
- "model.layers.10.self_attn.v_proj": {
1225
- "group_size": 64,
1226
- "bits": 4
1227
- },
1228
- "model.layers.10.self_attn.o_proj": {
1229
- "group_size": 64,
1230
- "bits": 4
1231
- },
1232
- "model.layers.10.self_attn.rope": false,
1233
- "model.layers.10.mlp.gate_proj": {
1234
- "group_size": 64,
1235
- "bits": 4
1236
- },
1237
- "model.layers.10.mlp.down_proj": {
1238
- "group_size": 64,
1239
- "bits": 4
1240
- },
1241
- "model.layers.10.mlp.up_proj": {
1242
- "group_size": 64,
1243
- "bits": 4
1244
- },
1245
- "model.layers.10.input_layernorm": false,
1246
- "model.layers.10.post_attention_layernorm": false,
1247
- "model.layers.11.self_attn.q_proj": {
1248
- "group_size": 64,
1249
- "bits": 4
1250
- },
1251
- "model.layers.11.self_attn.k_proj": {
1252
- "group_size": 64,
1253
- "bits": 4
1254
- },
1255
- "model.layers.11.self_attn.v_proj": {
1256
- "group_size": 64,
1257
- "bits": 6
1258
- },
1259
- "model.layers.11.self_attn.o_proj": {
1260
- "group_size": 64,
1261
- "bits": 4
1262
- },
1263
- "model.layers.11.self_attn.rope": false,
1264
- "model.layers.11.mlp.gate_proj": {
1265
- "group_size": 64,
1266
- "bits": 4
1267
- },
1268
- "model.layers.11.mlp.down_proj": {
1269
- "group_size": 64,
1270
- "bits": 6
1271
- },
1272
- "model.layers.11.mlp.up_proj": {
1273
- "group_size": 64,
1274
- "bits": 4
1275
- },
1276
- "model.layers.11.input_layernorm": false,
1277
- "model.layers.11.post_attention_layernorm": false,
1278
- "model.layers.12.self_attn.q_proj": {
1279
- "group_size": 64,
1280
- "bits": 4
1281
- },
1282
- "model.layers.12.self_attn.k_proj": {
1283
- "group_size": 64,
1284
- "bits": 4
1285
- },
1286
- "model.layers.12.self_attn.v_proj": {
1287
- "group_size": 64,
1288
- "bits": 4
1289
- },
1290
- "model.layers.12.self_attn.o_proj": {
1291
- "group_size": 64,
1292
- "bits": 4
1293
- },
1294
- "model.layers.12.self_attn.rope": false,
1295
- "model.layers.12.mlp.gate_proj": {
1296
- "group_size": 64,
1297
- "bits": 4
1298
- },
1299
- "model.layers.12.mlp.down_proj": {
1300
- "group_size": 64,
1301
- "bits": 4
1302
- },
1303
- "model.layers.12.mlp.up_proj": {
1304
- "group_size": 64,
1305
- "bits": 4
1306
- },
1307
- "model.layers.12.input_layernorm": false,
1308
- "model.layers.12.post_attention_layernorm": false,
1309
- "model.layers.13.self_attn.q_proj": {
1310
- "group_size": 64,
1311
- "bits": 4
1312
- },
1313
- "model.layers.13.self_attn.k_proj": {
1314
- "group_size": 64,
1315
- "bits": 4
1316
- },
1317
- "model.layers.13.self_attn.v_proj": {
1318
- "group_size": 64,
1319
- "bits": 4
1320
- },
1321
- "model.layers.13.self_attn.o_proj": {
1322
- "group_size": 64,
1323
- "bits": 4
1324
- },
1325
- "model.layers.13.self_attn.rope": false,
1326
- "model.layers.13.mlp.gate_proj": {
1327
- "group_size": 64,
1328
- "bits": 4
1329
- },
1330
- "model.layers.13.mlp.down_proj": {
1331
- "group_size": 64,
1332
- "bits": 4
1333
- },
1334
- "model.layers.13.mlp.up_proj": {
1335
- "group_size": 64,
1336
- "bits": 4
1337
- },
1338
- "model.layers.13.input_layernorm": false,
1339
- "model.layers.13.post_attention_layernorm": false,
1340
- "model.layers.14.self_attn.q_proj": {
1341
- "group_size": 64,
1342
- "bits": 4
1343
- },
1344
- "model.layers.14.self_attn.k_proj": {
1345
- "group_size": 64,
1346
- "bits": 4
1347
- },
1348
- "model.layers.14.self_attn.v_proj": {
1349
- "group_size": 64,
1350
- "bits": 6
1351
- },
1352
- "model.layers.14.self_attn.o_proj": {
1353
- "group_size": 64,
1354
- "bits": 4
1355
- },
1356
- "model.layers.14.self_attn.rope": false,
1357
- "model.layers.14.mlp.gate_proj": {
1358
- "group_size": 64,
1359
- "bits": 4
1360
- },
1361
- "model.layers.14.mlp.down_proj": {
1362
- "group_size": 64,
1363
- "bits": 6
1364
- },
1365
- "model.layers.14.mlp.up_proj": {
1366
- "group_size": 64,
1367
- "bits": 4
1368
- },
1369
- "model.layers.14.input_layernorm": false,
1370
- "model.layers.14.post_attention_layernorm": false,
1371
- "model.layers.15.self_attn.q_proj": {
1372
- "group_size": 64,
1373
- "bits": 4
1374
- },
1375
- "model.layers.15.self_attn.k_proj": {
1376
- "group_size": 64,
1377
- "bits": 4
1378
- },
1379
- "model.layers.15.self_attn.v_proj": {
1380
- "group_size": 64,
1381
- "bits": 4
1382
- },
1383
- "model.layers.15.self_attn.o_proj": {
1384
- "group_size": 64,
1385
- "bits": 4
1386
- },
1387
- "model.layers.15.self_attn.rope": false,
1388
- "model.layers.15.mlp.gate_proj": {
1389
- "group_size": 64,
1390
- "bits": 4
1391
- },
1392
- "model.layers.15.mlp.down_proj": {
1393
- "group_size": 64,
1394
- "bits": 4
1395
- },
1396
- "model.layers.15.mlp.up_proj": {
1397
- "group_size": 64,
1398
- "bits": 4
1399
- },
1400
- "model.layers.15.input_layernorm": false,
1401
- "model.layers.15.post_attention_layernorm": false,
1402
- "model.layers.16.self_attn.q_proj": {
1403
- "group_size": 64,
1404
- "bits": 4
1405
- },
1406
- "model.layers.16.self_attn.k_proj": {
1407
- "group_size": 64,
1408
- "bits": 4
1409
- },
1410
- "model.layers.16.self_attn.v_proj": {
1411
- "group_size": 64,
1412
- "bits": 4
1413
- },
1414
- "model.layers.16.self_attn.o_proj": {
1415
- "group_size": 64,
1416
- "bits": 4
1417
- },
1418
- "model.layers.16.self_attn.rope": false,
1419
- "model.layers.16.mlp.gate_proj": {
1420
- "group_size": 64,
1421
- "bits": 4
1422
- },
1423
- "model.layers.16.mlp.down_proj": {
1424
- "group_size": 64,
1425
- "bits": 4
1426
- },
1427
- "model.layers.16.mlp.up_proj": {
1428
- "group_size": 64,
1429
- "bits": 4
1430
- },
1431
- "model.layers.16.input_layernorm": false,
1432
- "model.layers.16.post_attention_layernorm": false,
1433
- "model.layers.17.self_attn.q_proj": {
1434
- "group_size": 64,
1435
- "bits": 4
1436
- },
1437
- "model.layers.17.self_attn.k_proj": {
1438
- "group_size": 64,
1439
- "bits": 4
1440
- },
1441
- "model.layers.17.self_attn.v_proj": {
1442
- "group_size": 64,
1443
- "bits": 6
1444
- },
1445
- "model.layers.17.self_attn.o_proj": {
1446
- "group_size": 64,
1447
- "bits": 4
1448
- },
1449
- "model.layers.17.self_attn.rope": false,
1450
- "model.layers.17.mlp.gate_proj": {
1451
- "group_size": 64,
1452
- "bits": 4
1453
- },
1454
- "model.layers.17.mlp.down_proj": {
1455
- "group_size": 64,
1456
- "bits": 6
1457
- },
1458
- "model.layers.17.mlp.up_proj": {
1459
- "group_size": 64,
1460
- "bits": 4
1461
- },
1462
- "model.layers.17.input_layernorm": false,
1463
- "model.layers.17.post_attention_layernorm": false,
1464
- "model.layers.18.self_attn.q_proj": {
1465
- "group_size": 64,
1466
- "bits": 4
1467
- },
1468
- "model.layers.18.self_attn.k_proj": {
1469
- "group_size": 64,
1470
- "bits": 4
1471
- },
1472
- "model.layers.18.self_attn.v_proj": {
1473
- "group_size": 64,
1474
- "bits": 4
1475
- },
1476
- "model.layers.18.self_attn.o_proj": {
1477
- "group_size": 64,
1478
- "bits": 4
1479
- },
1480
- "model.layers.18.self_attn.rope": false,
1481
- "model.layers.18.mlp.gate_proj": {
1482
- "group_size": 64,
1483
- "bits": 4
1484
- },
1485
- "model.layers.18.mlp.down_proj": {
1486
- "group_size": 64,
1487
- "bits": 4
1488
- },
1489
- "model.layers.18.mlp.up_proj": {
1490
- "group_size": 64,
1491
- "bits": 4
1492
- },
1493
- "model.layers.18.input_layernorm": false,
1494
- "model.layers.18.post_attention_layernorm": false,
1495
- "model.layers.19.self_attn.q_proj": {
1496
- "group_size": 64,
1497
- "bits": 4
1498
- },
1499
- "model.layers.19.self_attn.k_proj": {
1500
- "group_size": 64,
1501
- "bits": 4
1502
- },
1503
- "model.layers.19.self_attn.v_proj": {
1504
- "group_size": 64,
1505
- "bits": 4
1506
- },
1507
- "model.layers.19.self_attn.o_proj": {
1508
- "group_size": 64,
1509
- "bits": 4
1510
- },
1511
- "model.layers.19.self_attn.rope": false,
1512
- "model.layers.19.mlp.gate_proj": {
1513
- "group_size": 64,
1514
- "bits": 4
1515
- },
1516
- "model.layers.19.mlp.down_proj": {
1517
- "group_size": 64,
1518
- "bits": 4
1519
- },
1520
- "model.layers.19.mlp.up_proj": {
1521
- "group_size": 64,
1522
- "bits": 4
1523
- },
1524
- "model.layers.19.input_layernorm": false,
1525
- "model.layers.19.post_attention_layernorm": false,
1526
- "model.layers.20.self_attn.q_proj": {
1527
- "group_size": 64,
1528
- "bits": 4
1529
- },
1530
- "model.layers.20.self_attn.k_proj": {
1531
- "group_size": 64,
1532
- "bits": 4
1533
- },
1534
- "model.layers.20.self_attn.v_proj": {
1535
- "group_size": 64,
1536
- "bits": 6
1537
- },
1538
- "model.layers.20.self_attn.o_proj": {
1539
- "group_size": 64,
1540
- "bits": 4
1541
- },
1542
- "model.layers.20.self_attn.rope": false,
1543
- "model.layers.20.mlp.gate_proj": {
1544
- "group_size": 64,
1545
- "bits": 4
1546
- },
1547
- "model.layers.20.mlp.down_proj": {
1548
- "group_size": 64,
1549
- "bits": 6
1550
- },
1551
- "model.layers.20.mlp.up_proj": {
1552
- "group_size": 64,
1553
- "bits": 4
1554
- },
1555
- "model.layers.20.input_layernorm": false,
1556
- "model.layers.20.post_attention_layernorm": false,
1557
- "model.layers.21.self_attn.q_proj": {
1558
- "group_size": 64,
1559
- "bits": 4
1560
- },
1561
- "model.layers.21.self_attn.k_proj": {
1562
- "group_size": 64,
1563
- "bits": 4
1564
- },
1565
- "model.layers.21.self_attn.v_proj": {
1566
- "group_size": 64,
1567
- "bits": 4
1568
- },
1569
- "model.layers.21.self_attn.o_proj": {
1570
- "group_size": 64,
1571
- "bits": 4
1572
- },
1573
- "model.layers.21.self_attn.rope": false,
1574
- "model.layers.21.mlp.gate_proj": {
1575
- "group_size": 64,
1576
- "bits": 4
1577
- },
1578
- "model.layers.21.mlp.down_proj": {
1579
- "group_size": 64,
1580
- "bits": 4
1581
- },
1582
- "model.layers.21.mlp.up_proj": {
1583
- "group_size": 64,
1584
- "bits": 4
1585
- },
1586
- "model.layers.21.input_layernorm": false,
1587
- "model.layers.21.post_attention_layernorm": false,
1588
- "model.layers.22.self_attn.q_proj": {
1589
- "group_size": 64,
1590
- "bits": 4
1591
- },
1592
- "model.layers.22.self_attn.k_proj": {
1593
- "group_size": 64,
1594
- "bits": 4
1595
- },
1596
- "model.layers.22.self_attn.v_proj": {
1597
- "group_size": 64,
1598
- "bits": 4
1599
- },
1600
- "model.layers.22.self_attn.o_proj": {
1601
- "group_size": 64,
1602
- "bits": 4
1603
- },
1604
- "model.layers.22.self_attn.rope": false,
1605
- "model.layers.22.mlp.gate_proj": {
1606
- "group_size": 64,
1607
- "bits": 4
1608
- },
1609
- "model.layers.22.mlp.down_proj": {
1610
- "group_size": 64,
1611
- "bits": 4
1612
- },
1613
- "model.layers.22.mlp.up_proj": {
1614
- "group_size": 64,
1615
- "bits": 4
1616
- },
1617
- "model.layers.22.input_layernorm": false,
1618
- "model.layers.22.post_attention_layernorm": false,
1619
- "model.layers.23.self_attn.q_proj": {
1620
- "group_size": 64,
1621
- "bits": 4
1622
- },
1623
- "model.layers.23.self_attn.k_proj": {
1624
- "group_size": 64,
1625
- "bits": 4
1626
- },
1627
- "model.layers.23.self_attn.v_proj": {
1628
- "group_size": 64,
1629
- "bits": 6
1630
- },
1631
- "model.layers.23.self_attn.o_proj": {
1632
- "group_size": 64,
1633
- "bits": 4
1634
- },
1635
- "model.layers.23.self_attn.rope": false,
1636
- "model.layers.23.mlp.gate_proj": {
1637
- "group_size": 64,
1638
- "bits": 4
1639
- },
1640
- "model.layers.23.mlp.down_proj": {
1641
- "group_size": 64,
1642
- "bits": 6
1643
- },
1644
- "model.layers.23.mlp.up_proj": {
1645
- "group_size": 64,
1646
- "bits": 4
1647
- },
1648
- "model.layers.23.input_layernorm": false,
1649
- "model.layers.23.post_attention_layernorm": false,
1650
- "model.layers.24.self_attn.q_proj": {
1651
- "group_size": 64,
1652
- "bits": 4
1653
- },
1654
- "model.layers.24.self_attn.k_proj": {
1655
- "group_size": 64,
1656
- "bits": 4
1657
- },
1658
- "model.layers.24.self_attn.v_proj": {
1659
- "group_size": 64,
1660
- "bits": 6
1661
- },
1662
- "model.layers.24.self_attn.o_proj": {
1663
- "group_size": 64,
1664
- "bits": 4
1665
- },
1666
- "model.layers.24.self_attn.rope": false,
1667
- "model.layers.24.mlp.gate_proj": {
1668
- "group_size": 64,
1669
- "bits": 4
1670
- },
1671
- "model.layers.24.mlp.down_proj": {
1672
- "group_size": 64,
1673
- "bits": 6
1674
- },
1675
- "model.layers.24.mlp.up_proj": {
1676
- "group_size": 64,
1677
- "bits": 4
1678
- },
1679
- "model.layers.24.input_layernorm": false,
1680
- "model.layers.24.post_attention_layernorm": false,
1681
- "model.layers.25.self_attn.q_proj": {
1682
- "group_size": 64,
1683
- "bits": 4
1684
- },
1685
- "model.layers.25.self_attn.k_proj": {
1686
- "group_size": 64,
1687
- "bits": 4
1688
- },
1689
- "model.layers.25.self_attn.v_proj": {
1690
- "group_size": 64,
1691
- "bits": 6
1692
- },
1693
- "model.layers.25.self_attn.o_proj": {
1694
- "group_size": 64,
1695
- "bits": 4
1696
- },
1697
- "model.layers.25.self_attn.rope": false,
1698
- "model.layers.25.mlp.gate_proj": {
1699
- "group_size": 64,
1700
- "bits": 4
1701
- },
1702
- "model.layers.25.mlp.down_proj": {
1703
- "group_size": 64,
1704
- "bits": 6
1705
- },
1706
- "model.layers.25.mlp.up_proj": {
1707
- "group_size": 64,
1708
- "bits": 4
1709
- },
1710
- "model.layers.25.input_layernorm": false,
1711
- "model.layers.25.post_attention_layernorm": false,
1712
- "model.layers.26.self_attn.q_proj": {
1713
- "group_size": 64,
1714
- "bits": 4
1715
- },
1716
- "model.layers.26.self_attn.k_proj": {
1717
- "group_size": 64,
1718
- "bits": 4
1719
- },
1720
- "model.layers.26.self_attn.v_proj": {
1721
- "group_size": 64,
1722
- "bits": 6
1723
- },
1724
- "model.layers.26.self_attn.o_proj": {
1725
- "group_size": 64,
1726
- "bits": 4
1727
- },
1728
- "model.layers.26.self_attn.rope": false,
1729
- "model.layers.26.mlp.gate_proj": {
1730
- "group_size": 64,
1731
- "bits": 4
1732
- },
1733
- "model.layers.26.mlp.down_proj": {
1734
- "group_size": 64,
1735
- "bits": 6
1736
- },
1737
- "model.layers.26.mlp.up_proj": {
1738
- "group_size": 64,
1739
- "bits": 4
1740
- },
1741
- "model.layers.26.input_layernorm": false,
1742
- "model.layers.26.post_attention_layernorm": false,
1743
- "model.layers.27.self_attn.q_proj": {
1744
- "group_size": 64,
1745
- "bits": 4
1746
- },
1747
- "model.layers.27.self_attn.k_proj": {
1748
- "group_size": 64,
1749
- "bits": 4
1750
- },
1751
- "model.layers.27.self_attn.v_proj": {
1752
- "group_size": 64,
1753
- "bits": 6
1754
- },
1755
- "model.layers.27.self_attn.o_proj": {
1756
- "group_size": 64,
1757
- "bits": 4
1758
- },
1759
- "model.layers.27.self_attn.rope": false,
1760
- "model.layers.27.mlp.gate_proj": {
1761
- "group_size": 64,
1762
- "bits": 4
1763
- },
1764
- "model.layers.27.mlp.down_proj": {
1765
- "group_size": 64,
1766
- "bits": 6
1767
- },
1768
- "model.layers.27.mlp.up_proj": {
1769
- "group_size": 64,
1770
- "bits": 4
1771
- },
1772
- "model.layers.27.input_layernorm": false,
1773
- "model.layers.27.post_attention_layernorm": false,
1774
- "model.norm": false,
1775
- "lm_head": {
1776
- "group_size": 64,
1777
- "bits": 6
1778
- }
1779
- },
1780
- "rms_norm_eps": 1e-06,
1781
- "rope_theta": 10000,
1782
- "sliding_window": 4096,
1783
- "tie_word_embeddings": false,
1784
- "torch_dtype": "bfloat16",
1785
- "transformers_version": "4.44.0",
1786
- "use_cache": true,
1787
- "use_mrope": false,
1788
- "use_sliding_window": false,
1789
- "vocab_size": 152064
1790
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:48271047062e3580bbc7eebf3fb956ac8a2d9aca9f22f19d12a47194a7a7aabf
3
- size 4664651681
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/model.safetensors.index.json DELETED
@@ -1,742 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 4664568832
4
- },
5
- "weight_map": {
6
- "lm_head.biases": "model.safetensors",
7
- "lm_head.scales": "model.safetensors",
8
- "lm_head.weight": "model.safetensors",
9
- "model.embed_tokens.biases": "model.safetensors",
10
- "model.embed_tokens.scales": "model.safetensors",
11
- "model.embed_tokens.weight": "model.safetensors",
12
- "model.layers.0.input_layernorm.weight": "model.safetensors",
13
- "model.layers.0.mlp.down_proj.biases": "model.safetensors",
14
- "model.layers.0.mlp.down_proj.scales": "model.safetensors",
15
- "model.layers.0.mlp.down_proj.weight": "model.safetensors",
16
- "model.layers.0.mlp.gate_proj.biases": "model.safetensors",
17
- "model.layers.0.mlp.gate_proj.scales": "model.safetensors",
18
- "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
19
- "model.layers.0.mlp.up_proj.biases": "model.safetensors",
20
- "model.layers.0.mlp.up_proj.scales": "model.safetensors",
21
- "model.layers.0.mlp.up_proj.weight": "model.safetensors",
22
- "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
23
- "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
24
- "model.layers.0.self_attn.k_proj.biases": "model.safetensors",
25
- "model.layers.0.self_attn.k_proj.scales": "model.safetensors",
26
- "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
27
- "model.layers.0.self_attn.o_proj.biases": "model.safetensors",
28
- "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
29
- "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
30
- "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
31
- "model.layers.0.self_attn.q_proj.biases": "model.safetensors",
32
- "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
33
- "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
34
- "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
35
- "model.layers.0.self_attn.v_proj.biases": "model.safetensors",
36
- "model.layers.0.self_attn.v_proj.scales": "model.safetensors",
37
- "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
38
- "model.layers.1.input_layernorm.weight": "model.safetensors",
39
- "model.layers.1.mlp.down_proj.biases": "model.safetensors",
40
- "model.layers.1.mlp.down_proj.scales": "model.safetensors",
41
- "model.layers.1.mlp.down_proj.weight": "model.safetensors",
42
- "model.layers.1.mlp.gate_proj.biases": "model.safetensors",
43
- "model.layers.1.mlp.gate_proj.scales": "model.safetensors",
44
- "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
45
- "model.layers.1.mlp.up_proj.biases": "model.safetensors",
46
- "model.layers.1.mlp.up_proj.scales": "model.safetensors",
47
- "model.layers.1.mlp.up_proj.weight": "model.safetensors",
48
- "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
49
- "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
50
- "model.layers.1.self_attn.k_proj.biases": "model.safetensors",
51
- "model.layers.1.self_attn.k_proj.scales": "model.safetensors",
52
- "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
53
- "model.layers.1.self_attn.o_proj.biases": "model.safetensors",
54
- "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
55
- "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
56
- "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
57
- "model.layers.1.self_attn.q_proj.biases": "model.safetensors",
58
- "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
59
- "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
60
- "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
61
- "model.layers.1.self_attn.v_proj.biases": "model.safetensors",
62
- "model.layers.1.self_attn.v_proj.scales": "model.safetensors",
63
- "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
64
- "model.layers.10.input_layernorm.weight": "model.safetensors",
65
- "model.layers.10.mlp.down_proj.biases": "model.safetensors",
66
- "model.layers.10.mlp.down_proj.scales": "model.safetensors",
67
- "model.layers.10.mlp.down_proj.weight": "model.safetensors",
68
- "model.layers.10.mlp.gate_proj.biases": "model.safetensors",
69
- "model.layers.10.mlp.gate_proj.scales": "model.safetensors",
70
- "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
71
- "model.layers.10.mlp.up_proj.biases": "model.safetensors",
72
- "model.layers.10.mlp.up_proj.scales": "model.safetensors",
73
- "model.layers.10.mlp.up_proj.weight": "model.safetensors",
74
- "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
75
- "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
76
- "model.layers.10.self_attn.k_proj.biases": "model.safetensors",
77
- "model.layers.10.self_attn.k_proj.scales": "model.safetensors",
78
- "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
79
- "model.layers.10.self_attn.o_proj.biases": "model.safetensors",
80
- "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
81
- "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
82
- "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
83
- "model.layers.10.self_attn.q_proj.biases": "model.safetensors",
84
- "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
85
- "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
86
- "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
87
- "model.layers.10.self_attn.v_proj.biases": "model.safetensors",
88
- "model.layers.10.self_attn.v_proj.scales": "model.safetensors",
89
- "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
90
- "model.layers.11.input_layernorm.weight": "model.safetensors",
91
- "model.layers.11.mlp.down_proj.biases": "model.safetensors",
92
- "model.layers.11.mlp.down_proj.scales": "model.safetensors",
93
- "model.layers.11.mlp.down_proj.weight": "model.safetensors",
94
- "model.layers.11.mlp.gate_proj.biases": "model.safetensors",
95
- "model.layers.11.mlp.gate_proj.scales": "model.safetensors",
96
- "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
97
- "model.layers.11.mlp.up_proj.biases": "model.safetensors",
98
- "model.layers.11.mlp.up_proj.scales": "model.safetensors",
99
- "model.layers.11.mlp.up_proj.weight": "model.safetensors",
100
- "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
101
- "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
102
- "model.layers.11.self_attn.k_proj.biases": "model.safetensors",
103
- "model.layers.11.self_attn.k_proj.scales": "model.safetensors",
104
- "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
105
- "model.layers.11.self_attn.o_proj.biases": "model.safetensors",
106
- "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
107
- "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
108
- "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
109
- "model.layers.11.self_attn.q_proj.biases": "model.safetensors",
110
- "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
111
- "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
112
- "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
113
- "model.layers.11.self_attn.v_proj.biases": "model.safetensors",
114
- "model.layers.11.self_attn.v_proj.scales": "model.safetensors",
115
- "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
116
- "model.layers.12.input_layernorm.weight": "model.safetensors",
117
- "model.layers.12.mlp.down_proj.biases": "model.safetensors",
118
- "model.layers.12.mlp.down_proj.scales": "model.safetensors",
119
- "model.layers.12.mlp.down_proj.weight": "model.safetensors",
120
- "model.layers.12.mlp.gate_proj.biases": "model.safetensors",
121
- "model.layers.12.mlp.gate_proj.scales": "model.safetensors",
122
- "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
123
- "model.layers.12.mlp.up_proj.biases": "model.safetensors",
124
- "model.layers.12.mlp.up_proj.scales": "model.safetensors",
125
- "model.layers.12.mlp.up_proj.weight": "model.safetensors",
126
- "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
127
- "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
128
- "model.layers.12.self_attn.k_proj.biases": "model.safetensors",
129
- "model.layers.12.self_attn.k_proj.scales": "model.safetensors",
130
- "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
131
- "model.layers.12.self_attn.o_proj.biases": "model.safetensors",
132
- "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
133
- "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
134
- "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
135
- "model.layers.12.self_attn.q_proj.biases": "model.safetensors",
136
- "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
137
- "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
138
- "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
139
- "model.layers.12.self_attn.v_proj.biases": "model.safetensors",
140
- "model.layers.12.self_attn.v_proj.scales": "model.safetensors",
141
- "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
142
- "model.layers.13.input_layernorm.weight": "model.safetensors",
143
- "model.layers.13.mlp.down_proj.biases": "model.safetensors",
144
- "model.layers.13.mlp.down_proj.scales": "model.safetensors",
145
- "model.layers.13.mlp.down_proj.weight": "model.safetensors",
146
- "model.layers.13.mlp.gate_proj.biases": "model.safetensors",
147
- "model.layers.13.mlp.gate_proj.scales": "model.safetensors",
148
- "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
149
- "model.layers.13.mlp.up_proj.biases": "model.safetensors",
150
- "model.layers.13.mlp.up_proj.scales": "model.safetensors",
151
- "model.layers.13.mlp.up_proj.weight": "model.safetensors",
152
- "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
153
- "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
154
- "model.layers.13.self_attn.k_proj.biases": "model.safetensors",
155
- "model.layers.13.self_attn.k_proj.scales": "model.safetensors",
156
- "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
157
- "model.layers.13.self_attn.o_proj.biases": "model.safetensors",
158
- "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
159
- "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
160
- "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
161
- "model.layers.13.self_attn.q_proj.biases": "model.safetensors",
162
- "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
163
- "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
164
- "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
165
- "model.layers.13.self_attn.v_proj.biases": "model.safetensors",
166
- "model.layers.13.self_attn.v_proj.scales": "model.safetensors",
167
- "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
168
- "model.layers.14.input_layernorm.weight": "model.safetensors",
169
- "model.layers.14.mlp.down_proj.biases": "model.safetensors",
170
- "model.layers.14.mlp.down_proj.scales": "model.safetensors",
171
- "model.layers.14.mlp.down_proj.weight": "model.safetensors",
172
- "model.layers.14.mlp.gate_proj.biases": "model.safetensors",
173
- "model.layers.14.mlp.gate_proj.scales": "model.safetensors",
174
- "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
175
- "model.layers.14.mlp.up_proj.biases": "model.safetensors",
176
- "model.layers.14.mlp.up_proj.scales": "model.safetensors",
177
- "model.layers.14.mlp.up_proj.weight": "model.safetensors",
178
- "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
179
- "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
180
- "model.layers.14.self_attn.k_proj.biases": "model.safetensors",
181
- "model.layers.14.self_attn.k_proj.scales": "model.safetensors",
182
- "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
183
- "model.layers.14.self_attn.o_proj.biases": "model.safetensors",
184
- "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
185
- "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
186
- "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
187
- "model.layers.14.self_attn.q_proj.biases": "model.safetensors",
188
- "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
189
- "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
190
- "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
191
- "model.layers.14.self_attn.v_proj.biases": "model.safetensors",
192
- "model.layers.14.self_attn.v_proj.scales": "model.safetensors",
193
- "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
194
- "model.layers.15.input_layernorm.weight": "model.safetensors",
195
- "model.layers.15.mlp.down_proj.biases": "model.safetensors",
196
- "model.layers.15.mlp.down_proj.scales": "model.safetensors",
197
- "model.layers.15.mlp.down_proj.weight": "model.safetensors",
198
- "model.layers.15.mlp.gate_proj.biases": "model.safetensors",
199
- "model.layers.15.mlp.gate_proj.scales": "model.safetensors",
200
- "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
201
- "model.layers.15.mlp.up_proj.biases": "model.safetensors",
202
- "model.layers.15.mlp.up_proj.scales": "model.safetensors",
203
- "model.layers.15.mlp.up_proj.weight": "model.safetensors",
204
- "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
205
- "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
206
- "model.layers.15.self_attn.k_proj.biases": "model.safetensors",
207
- "model.layers.15.self_attn.k_proj.scales": "model.safetensors",
208
- "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
209
- "model.layers.15.self_attn.o_proj.biases": "model.safetensors",
210
- "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
211
- "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
212
- "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
213
- "model.layers.15.self_attn.q_proj.biases": "model.safetensors",
214
- "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
215
- "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
216
- "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
217
- "model.layers.15.self_attn.v_proj.biases": "model.safetensors",
218
- "model.layers.15.self_attn.v_proj.scales": "model.safetensors",
219
- "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
220
- "model.layers.16.input_layernorm.weight": "model.safetensors",
221
- "model.layers.16.mlp.down_proj.biases": "model.safetensors",
222
- "model.layers.16.mlp.down_proj.scales": "model.safetensors",
223
- "model.layers.16.mlp.down_proj.weight": "model.safetensors",
224
- "model.layers.16.mlp.gate_proj.biases": "model.safetensors",
225
- "model.layers.16.mlp.gate_proj.scales": "model.safetensors",
226
- "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
227
- "model.layers.16.mlp.up_proj.biases": "model.safetensors",
228
- "model.layers.16.mlp.up_proj.scales": "model.safetensors",
229
- "model.layers.16.mlp.up_proj.weight": "model.safetensors",
230
- "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
231
- "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
232
- "model.layers.16.self_attn.k_proj.biases": "model.safetensors",
233
- "model.layers.16.self_attn.k_proj.scales": "model.safetensors",
234
- "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
235
- "model.layers.16.self_attn.o_proj.biases": "model.safetensors",
236
- "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
237
- "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
238
- "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
239
- "model.layers.16.self_attn.q_proj.biases": "model.safetensors",
240
- "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
241
- "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
242
- "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
243
- "model.layers.16.self_attn.v_proj.biases": "model.safetensors",
244
- "model.layers.16.self_attn.v_proj.scales": "model.safetensors",
245
- "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
246
- "model.layers.17.input_layernorm.weight": "model.safetensors",
247
- "model.layers.17.mlp.down_proj.biases": "model.safetensors",
248
- "model.layers.17.mlp.down_proj.scales": "model.safetensors",
249
- "model.layers.17.mlp.down_proj.weight": "model.safetensors",
250
- "model.layers.17.mlp.gate_proj.biases": "model.safetensors",
251
- "model.layers.17.mlp.gate_proj.scales": "model.safetensors",
252
- "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
253
- "model.layers.17.mlp.up_proj.biases": "model.safetensors",
254
- "model.layers.17.mlp.up_proj.scales": "model.safetensors",
255
- "model.layers.17.mlp.up_proj.weight": "model.safetensors",
256
- "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
257
- "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
258
- "model.layers.17.self_attn.k_proj.biases": "model.safetensors",
259
- "model.layers.17.self_attn.k_proj.scales": "model.safetensors",
260
- "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
261
- "model.layers.17.self_attn.o_proj.biases": "model.safetensors",
262
- "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
263
- "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
264
- "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
265
- "model.layers.17.self_attn.q_proj.biases": "model.safetensors",
266
- "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
267
- "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
268
- "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
269
- "model.layers.17.self_attn.v_proj.biases": "model.safetensors",
270
- "model.layers.17.self_attn.v_proj.scales": "model.safetensors",
271
- "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
272
- "model.layers.18.input_layernorm.weight": "model.safetensors",
273
- "model.layers.18.mlp.down_proj.biases": "model.safetensors",
274
- "model.layers.18.mlp.down_proj.scales": "model.safetensors",
275
- "model.layers.18.mlp.down_proj.weight": "model.safetensors",
276
- "model.layers.18.mlp.gate_proj.biases": "model.safetensors",
277
- "model.layers.18.mlp.gate_proj.scales": "model.safetensors",
278
- "model.layers.18.mlp.gate_proj.weight": "model.safetensors",
279
- "model.layers.18.mlp.up_proj.biases": "model.safetensors",
280
- "model.layers.18.mlp.up_proj.scales": "model.safetensors",
281
- "model.layers.18.mlp.up_proj.weight": "model.safetensors",
282
- "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
283
- "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
284
- "model.layers.18.self_attn.k_proj.biases": "model.safetensors",
285
- "model.layers.18.self_attn.k_proj.scales": "model.safetensors",
286
- "model.layers.18.self_attn.k_proj.weight": "model.safetensors",
287
- "model.layers.18.self_attn.o_proj.biases": "model.safetensors",
288
- "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
289
- "model.layers.18.self_attn.o_proj.weight": "model.safetensors",
290
- "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
291
- "model.layers.18.self_attn.q_proj.biases": "model.safetensors",
292
- "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
293
- "model.layers.18.self_attn.q_proj.weight": "model.safetensors",
294
- "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
295
- "model.layers.18.self_attn.v_proj.biases": "model.safetensors",
296
- "model.layers.18.self_attn.v_proj.scales": "model.safetensors",
297
- "model.layers.18.self_attn.v_proj.weight": "model.safetensors",
298
- "model.layers.19.input_layernorm.weight": "model.safetensors",
299
- "model.layers.19.mlp.down_proj.biases": "model.safetensors",
300
- "model.layers.19.mlp.down_proj.scales": "model.safetensors",
301
- "model.layers.19.mlp.down_proj.weight": "model.safetensors",
302
- "model.layers.19.mlp.gate_proj.biases": "model.safetensors",
303
- "model.layers.19.mlp.gate_proj.scales": "model.safetensors",
304
- "model.layers.19.mlp.gate_proj.weight": "model.safetensors",
305
- "model.layers.19.mlp.up_proj.biases": "model.safetensors",
306
- "model.layers.19.mlp.up_proj.scales": "model.safetensors",
307
- "model.layers.19.mlp.up_proj.weight": "model.safetensors",
308
- "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
309
- "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
310
- "model.layers.19.self_attn.k_proj.biases": "model.safetensors",
311
- "model.layers.19.self_attn.k_proj.scales": "model.safetensors",
312
- "model.layers.19.self_attn.k_proj.weight": "model.safetensors",
313
- "model.layers.19.self_attn.o_proj.biases": "model.safetensors",
314
- "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
315
- "model.layers.19.self_attn.o_proj.weight": "model.safetensors",
316
- "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
317
- "model.layers.19.self_attn.q_proj.biases": "model.safetensors",
318
- "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
319
- "model.layers.19.self_attn.q_proj.weight": "model.safetensors",
320
- "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
321
- "model.layers.19.self_attn.v_proj.biases": "model.safetensors",
322
- "model.layers.19.self_attn.v_proj.scales": "model.safetensors",
323
- "model.layers.19.self_attn.v_proj.weight": "model.safetensors",
324
- "model.layers.2.input_layernorm.weight": "model.safetensors",
325
- "model.layers.2.mlp.down_proj.biases": "model.safetensors",
326
- "model.layers.2.mlp.down_proj.scales": "model.safetensors",
327
- "model.layers.2.mlp.down_proj.weight": "model.safetensors",
328
- "model.layers.2.mlp.gate_proj.biases": "model.safetensors",
329
- "model.layers.2.mlp.gate_proj.scales": "model.safetensors",
330
- "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
331
- "model.layers.2.mlp.up_proj.biases": "model.safetensors",
332
- "model.layers.2.mlp.up_proj.scales": "model.safetensors",
333
- "model.layers.2.mlp.up_proj.weight": "model.safetensors",
334
- "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
335
- "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
336
- "model.layers.2.self_attn.k_proj.biases": "model.safetensors",
337
- "model.layers.2.self_attn.k_proj.scales": "model.safetensors",
338
- "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
339
- "model.layers.2.self_attn.o_proj.biases": "model.safetensors",
340
- "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
341
- "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
342
- "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
343
- "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
344
- "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
345
- "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
346
- "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
347
- "model.layers.2.self_attn.v_proj.biases": "model.safetensors",
348
- "model.layers.2.self_attn.v_proj.scales": "model.safetensors",
349
- "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
350
- "model.layers.20.input_layernorm.weight": "model.safetensors",
351
- "model.layers.20.mlp.down_proj.biases": "model.safetensors",
352
- "model.layers.20.mlp.down_proj.scales": "model.safetensors",
353
- "model.layers.20.mlp.down_proj.weight": "model.safetensors",
354
- "model.layers.20.mlp.gate_proj.biases": "model.safetensors",
355
- "model.layers.20.mlp.gate_proj.scales": "model.safetensors",
356
- "model.layers.20.mlp.gate_proj.weight": "model.safetensors",
357
- "model.layers.20.mlp.up_proj.biases": "model.safetensors",
358
- "model.layers.20.mlp.up_proj.scales": "model.safetensors",
359
- "model.layers.20.mlp.up_proj.weight": "model.safetensors",
360
- "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
361
- "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
362
- "model.layers.20.self_attn.k_proj.biases": "model.safetensors",
363
- "model.layers.20.self_attn.k_proj.scales": "model.safetensors",
364
- "model.layers.20.self_attn.k_proj.weight": "model.safetensors",
365
- "model.layers.20.self_attn.o_proj.biases": "model.safetensors",
366
- "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
367
- "model.layers.20.self_attn.o_proj.weight": "model.safetensors",
368
- "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
369
- "model.layers.20.self_attn.q_proj.biases": "model.safetensors",
370
- "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
371
- "model.layers.20.self_attn.q_proj.weight": "model.safetensors",
372
- "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
373
- "model.layers.20.self_attn.v_proj.biases": "model.safetensors",
374
- "model.layers.20.self_attn.v_proj.scales": "model.safetensors",
375
- "model.layers.20.self_attn.v_proj.weight": "model.safetensors",
376
- "model.layers.21.input_layernorm.weight": "model.safetensors",
377
- "model.layers.21.mlp.down_proj.biases": "model.safetensors",
378
- "model.layers.21.mlp.down_proj.scales": "model.safetensors",
379
- "model.layers.21.mlp.down_proj.weight": "model.safetensors",
380
- "model.layers.21.mlp.gate_proj.biases": "model.safetensors",
381
- "model.layers.21.mlp.gate_proj.scales": "model.safetensors",
382
- "model.layers.21.mlp.gate_proj.weight": "model.safetensors",
383
- "model.layers.21.mlp.up_proj.biases": "model.safetensors",
384
- "model.layers.21.mlp.up_proj.scales": "model.safetensors",
385
- "model.layers.21.mlp.up_proj.weight": "model.safetensors",
386
- "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
387
- "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
388
- "model.layers.21.self_attn.k_proj.biases": "model.safetensors",
389
- "model.layers.21.self_attn.k_proj.scales": "model.safetensors",
390
- "model.layers.21.self_attn.k_proj.weight": "model.safetensors",
391
- "model.layers.21.self_attn.o_proj.biases": "model.safetensors",
392
- "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
393
- "model.layers.21.self_attn.o_proj.weight": "model.safetensors",
394
- "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
395
- "model.layers.21.self_attn.q_proj.biases": "model.safetensors",
396
- "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
397
- "model.layers.21.self_attn.q_proj.weight": "model.safetensors",
398
- "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
399
- "model.layers.21.self_attn.v_proj.biases": "model.safetensors",
400
- "model.layers.21.self_attn.v_proj.scales": "model.safetensors",
401
- "model.layers.21.self_attn.v_proj.weight": "model.safetensors",
402
- "model.layers.22.input_layernorm.weight": "model.safetensors",
403
- "model.layers.22.mlp.down_proj.biases": "model.safetensors",
404
- "model.layers.22.mlp.down_proj.scales": "model.safetensors",
405
- "model.layers.22.mlp.down_proj.weight": "model.safetensors",
406
- "model.layers.22.mlp.gate_proj.biases": "model.safetensors",
407
- "model.layers.22.mlp.gate_proj.scales": "model.safetensors",
408
- "model.layers.22.mlp.gate_proj.weight": "model.safetensors",
409
- "model.layers.22.mlp.up_proj.biases": "model.safetensors",
410
- "model.layers.22.mlp.up_proj.scales": "model.safetensors",
411
- "model.layers.22.mlp.up_proj.weight": "model.safetensors",
412
- "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
413
- "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
414
- "model.layers.22.self_attn.k_proj.biases": "model.safetensors",
415
- "model.layers.22.self_attn.k_proj.scales": "model.safetensors",
416
- "model.layers.22.self_attn.k_proj.weight": "model.safetensors",
417
- "model.layers.22.self_attn.o_proj.biases": "model.safetensors",
418
- "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
419
- "model.layers.22.self_attn.o_proj.weight": "model.safetensors",
420
- "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
421
- "model.layers.22.self_attn.q_proj.biases": "model.safetensors",
422
- "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
423
- "model.layers.22.self_attn.q_proj.weight": "model.safetensors",
424
- "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
425
- "model.layers.22.self_attn.v_proj.biases": "model.safetensors",
426
- "model.layers.22.self_attn.v_proj.scales": "model.safetensors",
427
- "model.layers.22.self_attn.v_proj.weight": "model.safetensors",
428
- "model.layers.23.input_layernorm.weight": "model.safetensors",
429
- "model.layers.23.mlp.down_proj.biases": "model.safetensors",
430
- "model.layers.23.mlp.down_proj.scales": "model.safetensors",
431
- "model.layers.23.mlp.down_proj.weight": "model.safetensors",
432
- "model.layers.23.mlp.gate_proj.biases": "model.safetensors",
433
- "model.layers.23.mlp.gate_proj.scales": "model.safetensors",
434
- "model.layers.23.mlp.gate_proj.weight": "model.safetensors",
435
- "model.layers.23.mlp.up_proj.biases": "model.safetensors",
436
- "model.layers.23.mlp.up_proj.scales": "model.safetensors",
437
- "model.layers.23.mlp.up_proj.weight": "model.safetensors",
438
- "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
439
- "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
440
- "model.layers.23.self_attn.k_proj.biases": "model.safetensors",
441
- "model.layers.23.self_attn.k_proj.scales": "model.safetensors",
442
- "model.layers.23.self_attn.k_proj.weight": "model.safetensors",
443
- "model.layers.23.self_attn.o_proj.biases": "model.safetensors",
444
- "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
445
- "model.layers.23.self_attn.o_proj.weight": "model.safetensors",
446
- "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
447
- "model.layers.23.self_attn.q_proj.biases": "model.safetensors",
448
- "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
449
- "model.layers.23.self_attn.q_proj.weight": "model.safetensors",
450
- "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
451
- "model.layers.23.self_attn.v_proj.biases": "model.safetensors",
452
- "model.layers.23.self_attn.v_proj.scales": "model.safetensors",
453
- "model.layers.23.self_attn.v_proj.weight": "model.safetensors",
454
- "model.layers.24.input_layernorm.weight": "model.safetensors",
455
- "model.layers.24.mlp.down_proj.biases": "model.safetensors",
456
- "model.layers.24.mlp.down_proj.scales": "model.safetensors",
457
- "model.layers.24.mlp.down_proj.weight": "model.safetensors",
458
- "model.layers.24.mlp.gate_proj.biases": "model.safetensors",
459
- "model.layers.24.mlp.gate_proj.scales": "model.safetensors",
460
- "model.layers.24.mlp.gate_proj.weight": "model.safetensors",
461
- "model.layers.24.mlp.up_proj.biases": "model.safetensors",
462
- "model.layers.24.mlp.up_proj.scales": "model.safetensors",
463
- "model.layers.24.mlp.up_proj.weight": "model.safetensors",
464
- "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
465
- "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
466
- "model.layers.24.self_attn.k_proj.biases": "model.safetensors",
467
- "model.layers.24.self_attn.k_proj.scales": "model.safetensors",
468
- "model.layers.24.self_attn.k_proj.weight": "model.safetensors",
469
- "model.layers.24.self_attn.o_proj.biases": "model.safetensors",
470
- "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
471
- "model.layers.24.self_attn.o_proj.weight": "model.safetensors",
472
- "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
473
- "model.layers.24.self_attn.q_proj.biases": "model.safetensors",
474
- "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
475
- "model.layers.24.self_attn.q_proj.weight": "model.safetensors",
476
- "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
477
- "model.layers.24.self_attn.v_proj.biases": "model.safetensors",
478
- "model.layers.24.self_attn.v_proj.scales": "model.safetensors",
479
- "model.layers.24.self_attn.v_proj.weight": "model.safetensors",
480
- "model.layers.25.input_layernorm.weight": "model.safetensors",
481
- "model.layers.25.mlp.down_proj.biases": "model.safetensors",
482
- "model.layers.25.mlp.down_proj.scales": "model.safetensors",
483
- "model.layers.25.mlp.down_proj.weight": "model.safetensors",
484
- "model.layers.25.mlp.gate_proj.biases": "model.safetensors",
485
- "model.layers.25.mlp.gate_proj.scales": "model.safetensors",
486
- "model.layers.25.mlp.gate_proj.weight": "model.safetensors",
487
- "model.layers.25.mlp.up_proj.biases": "model.safetensors",
488
- "model.layers.25.mlp.up_proj.scales": "model.safetensors",
489
- "model.layers.25.mlp.up_proj.weight": "model.safetensors",
490
- "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
491
- "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
492
- "model.layers.25.self_attn.k_proj.biases": "model.safetensors",
493
- "model.layers.25.self_attn.k_proj.scales": "model.safetensors",
494
- "model.layers.25.self_attn.k_proj.weight": "model.safetensors",
495
- "model.layers.25.self_attn.o_proj.biases": "model.safetensors",
496
- "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
497
- "model.layers.25.self_attn.o_proj.weight": "model.safetensors",
498
- "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
499
- "model.layers.25.self_attn.q_proj.biases": "model.safetensors",
500
- "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
501
- "model.layers.25.self_attn.q_proj.weight": "model.safetensors",
502
- "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
503
- "model.layers.25.self_attn.v_proj.biases": "model.safetensors",
504
- "model.layers.25.self_attn.v_proj.scales": "model.safetensors",
505
- "model.layers.25.self_attn.v_proj.weight": "model.safetensors",
506
- "model.layers.26.input_layernorm.weight": "model.safetensors",
507
- "model.layers.26.mlp.down_proj.biases": "model.safetensors",
508
- "model.layers.26.mlp.down_proj.scales": "model.safetensors",
509
- "model.layers.26.mlp.down_proj.weight": "model.safetensors",
510
- "model.layers.26.mlp.gate_proj.biases": "model.safetensors",
511
- "model.layers.26.mlp.gate_proj.scales": "model.safetensors",
512
- "model.layers.26.mlp.gate_proj.weight": "model.safetensors",
513
- "model.layers.26.mlp.up_proj.biases": "model.safetensors",
514
- "model.layers.26.mlp.up_proj.scales": "model.safetensors",
515
- "model.layers.26.mlp.up_proj.weight": "model.safetensors",
516
- "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
517
- "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
518
- "model.layers.26.self_attn.k_proj.biases": "model.safetensors",
519
- "model.layers.26.self_attn.k_proj.scales": "model.safetensors",
520
- "model.layers.26.self_attn.k_proj.weight": "model.safetensors",
521
- "model.layers.26.self_attn.o_proj.biases": "model.safetensors",
522
- "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
523
- "model.layers.26.self_attn.o_proj.weight": "model.safetensors",
524
- "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
525
- "model.layers.26.self_attn.q_proj.biases": "model.safetensors",
526
- "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
527
- "model.layers.26.self_attn.q_proj.weight": "model.safetensors",
528
- "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
529
- "model.layers.26.self_attn.v_proj.biases": "model.safetensors",
530
- "model.layers.26.self_attn.v_proj.scales": "model.safetensors",
531
- "model.layers.26.self_attn.v_proj.weight": "model.safetensors",
532
- "model.layers.27.input_layernorm.weight": "model.safetensors",
533
- "model.layers.27.mlp.down_proj.biases": "model.safetensors",
534
- "model.layers.27.mlp.down_proj.scales": "model.safetensors",
535
- "model.layers.27.mlp.down_proj.weight": "model.safetensors",
536
- "model.layers.27.mlp.gate_proj.biases": "model.safetensors",
537
- "model.layers.27.mlp.gate_proj.scales": "model.safetensors",
538
- "model.layers.27.mlp.gate_proj.weight": "model.safetensors",
539
- "model.layers.27.mlp.up_proj.biases": "model.safetensors",
540
- "model.layers.27.mlp.up_proj.scales": "model.safetensors",
541
- "model.layers.27.mlp.up_proj.weight": "model.safetensors",
542
- "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
543
- "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
544
- "model.layers.27.self_attn.k_proj.biases": "model.safetensors",
545
- "model.layers.27.self_attn.k_proj.scales": "model.safetensors",
546
- "model.layers.27.self_attn.k_proj.weight": "model.safetensors",
547
- "model.layers.27.self_attn.o_proj.biases": "model.safetensors",
548
- "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
549
- "model.layers.27.self_attn.o_proj.weight": "model.safetensors",
550
- "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
551
- "model.layers.27.self_attn.q_proj.biases": "model.safetensors",
552
- "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
553
- "model.layers.27.self_attn.q_proj.weight": "model.safetensors",
554
- "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
555
- "model.layers.27.self_attn.v_proj.biases": "model.safetensors",
556
- "model.layers.27.self_attn.v_proj.scales": "model.safetensors",
557
- "model.layers.27.self_attn.v_proj.weight": "model.safetensors",
558
- "model.layers.3.input_layernorm.weight": "model.safetensors",
559
- "model.layers.3.mlp.down_proj.biases": "model.safetensors",
560
- "model.layers.3.mlp.down_proj.scales": "model.safetensors",
561
- "model.layers.3.mlp.down_proj.weight": "model.safetensors",
562
- "model.layers.3.mlp.gate_proj.biases": "model.safetensors",
563
- "model.layers.3.mlp.gate_proj.scales": "model.safetensors",
564
- "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
565
- "model.layers.3.mlp.up_proj.biases": "model.safetensors",
566
- "model.layers.3.mlp.up_proj.scales": "model.safetensors",
567
- "model.layers.3.mlp.up_proj.weight": "model.safetensors",
568
- "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
569
- "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
570
- "model.layers.3.self_attn.k_proj.biases": "model.safetensors",
571
- "model.layers.3.self_attn.k_proj.scales": "model.safetensors",
572
- "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
573
- "model.layers.3.self_attn.o_proj.biases": "model.safetensors",
574
- "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
575
- "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
576
- "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
577
- "model.layers.3.self_attn.q_proj.biases": "model.safetensors",
578
- "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
579
- "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
580
- "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
581
- "model.layers.3.self_attn.v_proj.biases": "model.safetensors",
582
- "model.layers.3.self_attn.v_proj.scales": "model.safetensors",
583
- "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
584
- "model.layers.4.input_layernorm.weight": "model.safetensors",
585
- "model.layers.4.mlp.down_proj.biases": "model.safetensors",
586
- "model.layers.4.mlp.down_proj.scales": "model.safetensors",
587
- "model.layers.4.mlp.down_proj.weight": "model.safetensors",
588
- "model.layers.4.mlp.gate_proj.biases": "model.safetensors",
589
- "model.layers.4.mlp.gate_proj.scales": "model.safetensors",
590
- "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
591
- "model.layers.4.mlp.up_proj.biases": "model.safetensors",
592
- "model.layers.4.mlp.up_proj.scales": "model.safetensors",
593
- "model.layers.4.mlp.up_proj.weight": "model.safetensors",
594
- "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
595
- "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
596
- "model.layers.4.self_attn.k_proj.biases": "model.safetensors",
597
- "model.layers.4.self_attn.k_proj.scales": "model.safetensors",
598
- "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
599
- "model.layers.4.self_attn.o_proj.biases": "model.safetensors",
600
- "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
601
- "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
602
- "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
603
- "model.layers.4.self_attn.q_proj.biases": "model.safetensors",
604
- "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
605
- "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
606
- "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
607
- "model.layers.4.self_attn.v_proj.biases": "model.safetensors",
608
- "model.layers.4.self_attn.v_proj.scales": "model.safetensors",
609
- "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
610
- "model.layers.5.input_layernorm.weight": "model.safetensors",
611
- "model.layers.5.mlp.down_proj.biases": "model.safetensors",
612
- "model.layers.5.mlp.down_proj.scales": "model.safetensors",
613
- "model.layers.5.mlp.down_proj.weight": "model.safetensors",
614
- "model.layers.5.mlp.gate_proj.biases": "model.safetensors",
615
- "model.layers.5.mlp.gate_proj.scales": "model.safetensors",
616
- "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
617
- "model.layers.5.mlp.up_proj.biases": "model.safetensors",
618
- "model.layers.5.mlp.up_proj.scales": "model.safetensors",
619
- "model.layers.5.mlp.up_proj.weight": "model.safetensors",
620
- "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
621
- "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
622
- "model.layers.5.self_attn.k_proj.biases": "model.safetensors",
623
- "model.layers.5.self_attn.k_proj.scales": "model.safetensors",
624
- "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
625
- "model.layers.5.self_attn.o_proj.biases": "model.safetensors",
626
- "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
627
- "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
628
- "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
629
- "model.layers.5.self_attn.q_proj.biases": "model.safetensors",
630
- "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
631
- "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
632
- "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
633
- "model.layers.5.self_attn.v_proj.biases": "model.safetensors",
634
- "model.layers.5.self_attn.v_proj.scales": "model.safetensors",
635
- "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
636
- "model.layers.6.input_layernorm.weight": "model.safetensors",
637
- "model.layers.6.mlp.down_proj.biases": "model.safetensors",
638
- "model.layers.6.mlp.down_proj.scales": "model.safetensors",
639
- "model.layers.6.mlp.down_proj.weight": "model.safetensors",
640
- "model.layers.6.mlp.gate_proj.biases": "model.safetensors",
641
- "model.layers.6.mlp.gate_proj.scales": "model.safetensors",
642
- "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
643
- "model.layers.6.mlp.up_proj.biases": "model.safetensors",
644
- "model.layers.6.mlp.up_proj.scales": "model.safetensors",
645
- "model.layers.6.mlp.up_proj.weight": "model.safetensors",
646
- "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
647
- "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
648
- "model.layers.6.self_attn.k_proj.biases": "model.safetensors",
649
- "model.layers.6.self_attn.k_proj.scales": "model.safetensors",
650
- "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
651
- "model.layers.6.self_attn.o_proj.biases": "model.safetensors",
652
- "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
653
- "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
654
- "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
655
- "model.layers.6.self_attn.q_proj.biases": "model.safetensors",
656
- "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
657
- "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
658
- "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
659
- "model.layers.6.self_attn.v_proj.biases": "model.safetensors",
660
- "model.layers.6.self_attn.v_proj.scales": "model.safetensors",
661
- "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
662
- "model.layers.7.input_layernorm.weight": "model.safetensors",
663
- "model.layers.7.mlp.down_proj.biases": "model.safetensors",
664
- "model.layers.7.mlp.down_proj.scales": "model.safetensors",
665
- "model.layers.7.mlp.down_proj.weight": "model.safetensors",
666
- "model.layers.7.mlp.gate_proj.biases": "model.safetensors",
667
- "model.layers.7.mlp.gate_proj.scales": "model.safetensors",
668
- "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
669
- "model.layers.7.mlp.up_proj.biases": "model.safetensors",
670
- "model.layers.7.mlp.up_proj.scales": "model.safetensors",
671
- "model.layers.7.mlp.up_proj.weight": "model.safetensors",
672
- "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
673
- "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
674
- "model.layers.7.self_attn.k_proj.biases": "model.safetensors",
675
- "model.layers.7.self_attn.k_proj.scales": "model.safetensors",
676
- "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
677
- "model.layers.7.self_attn.o_proj.biases": "model.safetensors",
678
- "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
679
- "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
680
- "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
681
- "model.layers.7.self_attn.q_proj.biases": "model.safetensors",
682
- "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
683
- "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
684
- "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
685
- "model.layers.7.self_attn.v_proj.biases": "model.safetensors",
686
- "model.layers.7.self_attn.v_proj.scales": "model.safetensors",
687
- "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
688
- "model.layers.8.input_layernorm.weight": "model.safetensors",
689
- "model.layers.8.mlp.down_proj.biases": "model.safetensors",
690
- "model.layers.8.mlp.down_proj.scales": "model.safetensors",
691
- "model.layers.8.mlp.down_proj.weight": "model.safetensors",
692
- "model.layers.8.mlp.gate_proj.biases": "model.safetensors",
693
- "model.layers.8.mlp.gate_proj.scales": "model.safetensors",
694
- "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
695
- "model.layers.8.mlp.up_proj.biases": "model.safetensors",
696
- "model.layers.8.mlp.up_proj.scales": "model.safetensors",
697
- "model.layers.8.mlp.up_proj.weight": "model.safetensors",
698
- "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
699
- "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
700
- "model.layers.8.self_attn.k_proj.biases": "model.safetensors",
701
- "model.layers.8.self_attn.k_proj.scales": "model.safetensors",
702
- "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
703
- "model.layers.8.self_attn.o_proj.biases": "model.safetensors",
704
- "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
705
- "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
706
- "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
707
- "model.layers.8.self_attn.q_proj.biases": "model.safetensors",
708
- "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
709
- "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
710
- "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
711
- "model.layers.8.self_attn.v_proj.biases": "model.safetensors",
712
- "model.layers.8.self_attn.v_proj.scales": "model.safetensors",
713
- "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
714
- "model.layers.9.input_layernorm.weight": "model.safetensors",
715
- "model.layers.9.mlp.down_proj.biases": "model.safetensors",
716
- "model.layers.9.mlp.down_proj.scales": "model.safetensors",
717
- "model.layers.9.mlp.down_proj.weight": "model.safetensors",
718
- "model.layers.9.mlp.gate_proj.biases": "model.safetensors",
719
- "model.layers.9.mlp.gate_proj.scales": "model.safetensors",
720
- "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
721
- "model.layers.9.mlp.up_proj.biases": "model.safetensors",
722
- "model.layers.9.mlp.up_proj.scales": "model.safetensors",
723
- "model.layers.9.mlp.up_proj.weight": "model.safetensors",
724
- "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
725
- "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
726
- "model.layers.9.self_attn.k_proj.biases": "model.safetensors",
727
- "model.layers.9.self_attn.k_proj.scales": "model.safetensors",
728
- "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
729
- "model.layers.9.self_attn.o_proj.biases": "model.safetensors",
730
- "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
731
- "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
732
- "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
733
- "model.layers.9.self_attn.q_proj.biases": "model.safetensors",
734
- "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
735
- "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
736
- "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
737
- "model.layers.9.self_attn.v_proj.biases": "model.safetensors",
738
- "model.layers.9.self_attn.v_proj.scales": "model.safetensors",
739
- "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
740
- "model.norm.weight": "model.safetensors"
741
- }
742
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,6_mixed/tokenizer_config.json DELETED
@@ -1,195 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "151643": {
7
- "content": "<|end▁of▁sentence|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "151644": {
15
- "content": "<|User|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "151645": {
23
- "content": "<|Assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "151646": {
31
- "content": "<|begin▁of▁sentence|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "151647": {
39
- "content": "<|EOT|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "151648": {
47
- "content": "<think>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "151649": {
55
- "content": "</think>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "151650": {
63
- "content": "<|quad_start|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "151651": {
71
- "content": "<|quad_end|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "151652": {
79
- "content": "<|vision_start|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "151653": {
87
- "content": "<|vision_end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "151654": {
95
- "content": "<|vision_pad|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "151655": {
103
- "content": "<|image_pad|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "151656": {
111
- "content": "<|video_pad|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "151657": {
119
- "content": "<tool_call>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "151658": {
127
- "content": "</tool_call>",
128
- "lstrip": false,
129
- "normalized": false,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "151659": {
135
- "content": "<|fim_prefix|>",
136
- "lstrip": false,
137
- "normalized": false,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "151660": {
143
- "content": "<|fim_middle|>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "151661": {
151
- "content": "<|fim_suffix|>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "151662": {
159
- "content": "<|fim_pad|>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "151663": {
167
- "content": "<|repo_name|>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "151664": {
175
- "content": "<|file_sep|>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- }
182
- },
183
- "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
- "clean_up_tokenization_spaces": false,
186
- "eos_token": "<|end▁of▁sentence|>",
187
- "extra_special_tokens": {},
188
- "legacy": true,
189
- "model_max_length": 16384,
190
- "pad_token": "<|end▁of▁sentence|>",
191
- "sp_model_kwargs": {},
192
- "tokenizer_class": "LlamaTokenizerFast",
193
- "unk_token": null,
194
- "use_default_system_prompt": false
195
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/config.json DELETED
@@ -1,1790 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen2ForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization": {
19
- "group_size": 64,
20
- "bits": null,
21
- "model.embed_tokens": {
22
- "group_size": 64,
23
- "bits": 4
24
- },
25
- "model.layers.0.self_attn.q_proj": {
26
- "group_size": 64,
27
- "bits": 4
28
- },
29
- "model.layers.0.self_attn.k_proj": {
30
- "group_size": 64,
31
- "bits": 4
32
- },
33
- "model.layers.0.self_attn.v_proj": {
34
- "group_size": 64,
35
- "bits": 8
36
- },
37
- "model.layers.0.self_attn.o_proj": {
38
- "group_size": 64,
39
- "bits": 4
40
- },
41
- "model.layers.0.self_attn.rope": false,
42
- "model.layers.0.mlp.gate_proj": {
43
- "group_size": 64,
44
- "bits": 4
45
- },
46
- "model.layers.0.mlp.down_proj": {
47
- "group_size": 64,
48
- "bits": 8
49
- },
50
- "model.layers.0.mlp.up_proj": {
51
- "group_size": 64,
52
- "bits": 4
53
- },
54
- "model.layers.0.input_layernorm": false,
55
- "model.layers.0.post_attention_layernorm": false,
56
- "model.layers.1.self_attn.q_proj": {
57
- "group_size": 64,
58
- "bits": 4
59
- },
60
- "model.layers.1.self_attn.k_proj": {
61
- "group_size": 64,
62
- "bits": 4
63
- },
64
- "model.layers.1.self_attn.v_proj": {
65
- "group_size": 64,
66
- "bits": 8
67
- },
68
- "model.layers.1.self_attn.o_proj": {
69
- "group_size": 64,
70
- "bits": 4
71
- },
72
- "model.layers.1.self_attn.rope": false,
73
- "model.layers.1.mlp.gate_proj": {
74
- "group_size": 64,
75
- "bits": 4
76
- },
77
- "model.layers.1.mlp.down_proj": {
78
- "group_size": 64,
79
- "bits": 8
80
- },
81
- "model.layers.1.mlp.up_proj": {
82
- "group_size": 64,
83
- "bits": 4
84
- },
85
- "model.layers.1.input_layernorm": false,
86
- "model.layers.1.post_attention_layernorm": false,
87
- "model.layers.2.self_attn.q_proj": {
88
- "group_size": 64,
89
- "bits": 4
90
- },
91
- "model.layers.2.self_attn.k_proj": {
92
- "group_size": 64,
93
- "bits": 4
94
- },
95
- "model.layers.2.self_attn.v_proj": {
96
- "group_size": 64,
97
- "bits": 8
98
- },
99
- "model.layers.2.self_attn.o_proj": {
100
- "group_size": 64,
101
- "bits": 4
102
- },
103
- "model.layers.2.self_attn.rope": false,
104
- "model.layers.2.mlp.gate_proj": {
105
- "group_size": 64,
106
- "bits": 4
107
- },
108
- "model.layers.2.mlp.down_proj": {
109
- "group_size": 64,
110
- "bits": 8
111
- },
112
- "model.layers.2.mlp.up_proj": {
113
- "group_size": 64,
114
- "bits": 4
115
- },
116
- "model.layers.2.input_layernorm": false,
117
- "model.layers.2.post_attention_layernorm": false,
118
- "model.layers.3.self_attn.q_proj": {
119
- "group_size": 64,
120
- "bits": 4
121
- },
122
- "model.layers.3.self_attn.k_proj": {
123
- "group_size": 64,
124
- "bits": 4
125
- },
126
- "model.layers.3.self_attn.v_proj": {
127
- "group_size": 64,
128
- "bits": 4
129
- },
130
- "model.layers.3.self_attn.o_proj": {
131
- "group_size": 64,
132
- "bits": 4
133
- },
134
- "model.layers.3.self_attn.rope": false,
135
- "model.layers.3.mlp.gate_proj": {
136
- "group_size": 64,
137
- "bits": 4
138
- },
139
- "model.layers.3.mlp.down_proj": {
140
- "group_size": 64,
141
- "bits": 4
142
- },
143
- "model.layers.3.mlp.up_proj": {
144
- "group_size": 64,
145
- "bits": 4
146
- },
147
- "model.layers.3.input_layernorm": false,
148
- "model.layers.3.post_attention_layernorm": false,
149
- "model.layers.4.self_attn.q_proj": {
150
- "group_size": 64,
151
- "bits": 4
152
- },
153
- "model.layers.4.self_attn.k_proj": {
154
- "group_size": 64,
155
- "bits": 4
156
- },
157
- "model.layers.4.self_attn.v_proj": {
158
- "group_size": 64,
159
- "bits": 4
160
- },
161
- "model.layers.4.self_attn.o_proj": {
162
- "group_size": 64,
163
- "bits": 4
164
- },
165
- "model.layers.4.self_attn.rope": false,
166
- "model.layers.4.mlp.gate_proj": {
167
- "group_size": 64,
168
- "bits": 4
169
- },
170
- "model.layers.4.mlp.down_proj": {
171
- "group_size": 64,
172
- "bits": 4
173
- },
174
- "model.layers.4.mlp.up_proj": {
175
- "group_size": 64,
176
- "bits": 4
177
- },
178
- "model.layers.4.input_layernorm": false,
179
- "model.layers.4.post_attention_layernorm": false,
180
- "model.layers.5.self_attn.q_proj": {
181
- "group_size": 64,
182
- "bits": 4
183
- },
184
- "model.layers.5.self_attn.k_proj": {
185
- "group_size": 64,
186
- "bits": 4
187
- },
188
- "model.layers.5.self_attn.v_proj": {
189
- "group_size": 64,
190
- "bits": 8
191
- },
192
- "model.layers.5.self_attn.o_proj": {
193
- "group_size": 64,
194
- "bits": 4
195
- },
196
- "model.layers.5.self_attn.rope": false,
197
- "model.layers.5.mlp.gate_proj": {
198
- "group_size": 64,
199
- "bits": 4
200
- },
201
- "model.layers.5.mlp.down_proj": {
202
- "group_size": 64,
203
- "bits": 8
204
- },
205
- "model.layers.5.mlp.up_proj": {
206
- "group_size": 64,
207
- "bits": 4
208
- },
209
- "model.layers.5.input_layernorm": false,
210
- "model.layers.5.post_attention_layernorm": false,
211
- "model.layers.6.self_attn.q_proj": {
212
- "group_size": 64,
213
- "bits": 4
214
- },
215
- "model.layers.6.self_attn.k_proj": {
216
- "group_size": 64,
217
- "bits": 4
218
- },
219
- "model.layers.6.self_attn.v_proj": {
220
- "group_size": 64,
221
- "bits": 4
222
- },
223
- "model.layers.6.self_attn.o_proj": {
224
- "group_size": 64,
225
- "bits": 4
226
- },
227
- "model.layers.6.self_attn.rope": false,
228
- "model.layers.6.mlp.gate_proj": {
229
- "group_size": 64,
230
- "bits": 4
231
- },
232
- "model.layers.6.mlp.down_proj": {
233
- "group_size": 64,
234
- "bits": 4
235
- },
236
- "model.layers.6.mlp.up_proj": {
237
- "group_size": 64,
238
- "bits": 4
239
- },
240
- "model.layers.6.input_layernorm": false,
241
- "model.layers.6.post_attention_layernorm": false,
242
- "model.layers.7.self_attn.q_proj": {
243
- "group_size": 64,
244
- "bits": 4
245
- },
246
- "model.layers.7.self_attn.k_proj": {
247
- "group_size": 64,
248
- "bits": 4
249
- },
250
- "model.layers.7.self_attn.v_proj": {
251
- "group_size": 64,
252
- "bits": 4
253
- },
254
- "model.layers.7.self_attn.o_proj": {
255
- "group_size": 64,
256
- "bits": 4
257
- },
258
- "model.layers.7.self_attn.rope": false,
259
- "model.layers.7.mlp.gate_proj": {
260
- "group_size": 64,
261
- "bits": 4
262
- },
263
- "model.layers.7.mlp.down_proj": {
264
- "group_size": 64,
265
- "bits": 4
266
- },
267
- "model.layers.7.mlp.up_proj": {
268
- "group_size": 64,
269
- "bits": 4
270
- },
271
- "model.layers.7.input_layernorm": false,
272
- "model.layers.7.post_attention_layernorm": false,
273
- "model.layers.8.self_attn.q_proj": {
274
- "group_size": 64,
275
- "bits": 4
276
- },
277
- "model.layers.8.self_attn.k_proj": {
278
- "group_size": 64,
279
- "bits": 4
280
- },
281
- "model.layers.8.self_attn.v_proj": {
282
- "group_size": 64,
283
- "bits": 8
284
- },
285
- "model.layers.8.self_attn.o_proj": {
286
- "group_size": 64,
287
- "bits": 4
288
- },
289
- "model.layers.8.self_attn.rope": false,
290
- "model.layers.8.mlp.gate_proj": {
291
- "group_size": 64,
292
- "bits": 4
293
- },
294
- "model.layers.8.mlp.down_proj": {
295
- "group_size": 64,
296
- "bits": 8
297
- },
298
- "model.layers.8.mlp.up_proj": {
299
- "group_size": 64,
300
- "bits": 4
301
- },
302
- "model.layers.8.input_layernorm": false,
303
- "model.layers.8.post_attention_layernorm": false,
304
- "model.layers.9.self_attn.q_proj": {
305
- "group_size": 64,
306
- "bits": 4
307
- },
308
- "model.layers.9.self_attn.k_proj": {
309
- "group_size": 64,
310
- "bits": 4
311
- },
312
- "model.layers.9.self_attn.v_proj": {
313
- "group_size": 64,
314
- "bits": 4
315
- },
316
- "model.layers.9.self_attn.o_proj": {
317
- "group_size": 64,
318
- "bits": 4
319
- },
320
- "model.layers.9.self_attn.rope": false,
321
- "model.layers.9.mlp.gate_proj": {
322
- "group_size": 64,
323
- "bits": 4
324
- },
325
- "model.layers.9.mlp.down_proj": {
326
- "group_size": 64,
327
- "bits": 4
328
- },
329
- "model.layers.9.mlp.up_proj": {
330
- "group_size": 64,
331
- "bits": 4
332
- },
333
- "model.layers.9.input_layernorm": false,
334
- "model.layers.9.post_attention_layernorm": false,
335
- "model.layers.10.self_attn.q_proj": {
336
- "group_size": 64,
337
- "bits": 4
338
- },
339
- "model.layers.10.self_attn.k_proj": {
340
- "group_size": 64,
341
- "bits": 4
342
- },
343
- "model.layers.10.self_attn.v_proj": {
344
- "group_size": 64,
345
- "bits": 4
346
- },
347
- "model.layers.10.self_attn.o_proj": {
348
- "group_size": 64,
349
- "bits": 4
350
- },
351
- "model.layers.10.self_attn.rope": false,
352
- "model.layers.10.mlp.gate_proj": {
353
- "group_size": 64,
354
- "bits": 4
355
- },
356
- "model.layers.10.mlp.down_proj": {
357
- "group_size": 64,
358
- "bits": 4
359
- },
360
- "model.layers.10.mlp.up_proj": {
361
- "group_size": 64,
362
- "bits": 4
363
- },
364
- "model.layers.10.input_layernorm": false,
365
- "model.layers.10.post_attention_layernorm": false,
366
- "model.layers.11.self_attn.q_proj": {
367
- "group_size": 64,
368
- "bits": 4
369
- },
370
- "model.layers.11.self_attn.k_proj": {
371
- "group_size": 64,
372
- "bits": 4
373
- },
374
- "model.layers.11.self_attn.v_proj": {
375
- "group_size": 64,
376
- "bits": 8
377
- },
378
- "model.layers.11.self_attn.o_proj": {
379
- "group_size": 64,
380
- "bits": 4
381
- },
382
- "model.layers.11.self_attn.rope": false,
383
- "model.layers.11.mlp.gate_proj": {
384
- "group_size": 64,
385
- "bits": 4
386
- },
387
- "model.layers.11.mlp.down_proj": {
388
- "group_size": 64,
389
- "bits": 8
390
- },
391
- "model.layers.11.mlp.up_proj": {
392
- "group_size": 64,
393
- "bits": 4
394
- },
395
- "model.layers.11.input_layernorm": false,
396
- "model.layers.11.post_attention_layernorm": false,
397
- "model.layers.12.self_attn.q_proj": {
398
- "group_size": 64,
399
- "bits": 4
400
- },
401
- "model.layers.12.self_attn.k_proj": {
402
- "group_size": 64,
403
- "bits": 4
404
- },
405
- "model.layers.12.self_attn.v_proj": {
406
- "group_size": 64,
407
- "bits": 4
408
- },
409
- "model.layers.12.self_attn.o_proj": {
410
- "group_size": 64,
411
- "bits": 4
412
- },
413
- "model.layers.12.self_attn.rope": false,
414
- "model.layers.12.mlp.gate_proj": {
415
- "group_size": 64,
416
- "bits": 4
417
- },
418
- "model.layers.12.mlp.down_proj": {
419
- "group_size": 64,
420
- "bits": 4
421
- },
422
- "model.layers.12.mlp.up_proj": {
423
- "group_size": 64,
424
- "bits": 4
425
- },
426
- "model.layers.12.input_layernorm": false,
427
- "model.layers.12.post_attention_layernorm": false,
428
- "model.layers.13.self_attn.q_proj": {
429
- "group_size": 64,
430
- "bits": 4
431
- },
432
- "model.layers.13.self_attn.k_proj": {
433
- "group_size": 64,
434
- "bits": 4
435
- },
436
- "model.layers.13.self_attn.v_proj": {
437
- "group_size": 64,
438
- "bits": 4
439
- },
440
- "model.layers.13.self_attn.o_proj": {
441
- "group_size": 64,
442
- "bits": 4
443
- },
444
- "model.layers.13.self_attn.rope": false,
445
- "model.layers.13.mlp.gate_proj": {
446
- "group_size": 64,
447
- "bits": 4
448
- },
449
- "model.layers.13.mlp.down_proj": {
450
- "group_size": 64,
451
- "bits": 4
452
- },
453
- "model.layers.13.mlp.up_proj": {
454
- "group_size": 64,
455
- "bits": 4
456
- },
457
- "model.layers.13.input_layernorm": false,
458
- "model.layers.13.post_attention_layernorm": false,
459
- "model.layers.14.self_attn.q_proj": {
460
- "group_size": 64,
461
- "bits": 4
462
- },
463
- "model.layers.14.self_attn.k_proj": {
464
- "group_size": 64,
465
- "bits": 4
466
- },
467
- "model.layers.14.self_attn.v_proj": {
468
- "group_size": 64,
469
- "bits": 8
470
- },
471
- "model.layers.14.self_attn.o_proj": {
472
- "group_size": 64,
473
- "bits": 4
474
- },
475
- "model.layers.14.self_attn.rope": false,
476
- "model.layers.14.mlp.gate_proj": {
477
- "group_size": 64,
478
- "bits": 4
479
- },
480
- "model.layers.14.mlp.down_proj": {
481
- "group_size": 64,
482
- "bits": 8
483
- },
484
- "model.layers.14.mlp.up_proj": {
485
- "group_size": 64,
486
- "bits": 4
487
- },
488
- "model.layers.14.input_layernorm": false,
489
- "model.layers.14.post_attention_layernorm": false,
490
- "model.layers.15.self_attn.q_proj": {
491
- "group_size": 64,
492
- "bits": 4
493
- },
494
- "model.layers.15.self_attn.k_proj": {
495
- "group_size": 64,
496
- "bits": 4
497
- },
498
- "model.layers.15.self_attn.v_proj": {
499
- "group_size": 64,
500
- "bits": 4
501
- },
502
- "model.layers.15.self_attn.o_proj": {
503
- "group_size": 64,
504
- "bits": 4
505
- },
506
- "model.layers.15.self_attn.rope": false,
507
- "model.layers.15.mlp.gate_proj": {
508
- "group_size": 64,
509
- "bits": 4
510
- },
511
- "model.layers.15.mlp.down_proj": {
512
- "group_size": 64,
513
- "bits": 4
514
- },
515
- "model.layers.15.mlp.up_proj": {
516
- "group_size": 64,
517
- "bits": 4
518
- },
519
- "model.layers.15.input_layernorm": false,
520
- "model.layers.15.post_attention_layernorm": false,
521
- "model.layers.16.self_attn.q_proj": {
522
- "group_size": 64,
523
- "bits": 4
524
- },
525
- "model.layers.16.self_attn.k_proj": {
526
- "group_size": 64,
527
- "bits": 4
528
- },
529
- "model.layers.16.self_attn.v_proj": {
530
- "group_size": 64,
531
- "bits": 4
532
- },
533
- "model.layers.16.self_attn.o_proj": {
534
- "group_size": 64,
535
- "bits": 4
536
- },
537
- "model.layers.16.self_attn.rope": false,
538
- "model.layers.16.mlp.gate_proj": {
539
- "group_size": 64,
540
- "bits": 4
541
- },
542
- "model.layers.16.mlp.down_proj": {
543
- "group_size": 64,
544
- "bits": 4
545
- },
546
- "model.layers.16.mlp.up_proj": {
547
- "group_size": 64,
548
- "bits": 4
549
- },
550
- "model.layers.16.input_layernorm": false,
551
- "model.layers.16.post_attention_layernorm": false,
552
- "model.layers.17.self_attn.q_proj": {
553
- "group_size": 64,
554
- "bits": 4
555
- },
556
- "model.layers.17.self_attn.k_proj": {
557
- "group_size": 64,
558
- "bits": 4
559
- },
560
- "model.layers.17.self_attn.v_proj": {
561
- "group_size": 64,
562
- "bits": 8
563
- },
564
- "model.layers.17.self_attn.o_proj": {
565
- "group_size": 64,
566
- "bits": 4
567
- },
568
- "model.layers.17.self_attn.rope": false,
569
- "model.layers.17.mlp.gate_proj": {
570
- "group_size": 64,
571
- "bits": 4
572
- },
573
- "model.layers.17.mlp.down_proj": {
574
- "group_size": 64,
575
- "bits": 8
576
- },
577
- "model.layers.17.mlp.up_proj": {
578
- "group_size": 64,
579
- "bits": 4
580
- },
581
- "model.layers.17.input_layernorm": false,
582
- "model.layers.17.post_attention_layernorm": false,
583
- "model.layers.18.self_attn.q_proj": {
584
- "group_size": 64,
585
- "bits": 4
586
- },
587
- "model.layers.18.self_attn.k_proj": {
588
- "group_size": 64,
589
- "bits": 4
590
- },
591
- "model.layers.18.self_attn.v_proj": {
592
- "group_size": 64,
593
- "bits": 4
594
- },
595
- "model.layers.18.self_attn.o_proj": {
596
- "group_size": 64,
597
- "bits": 4
598
- },
599
- "model.layers.18.self_attn.rope": false,
600
- "model.layers.18.mlp.gate_proj": {
601
- "group_size": 64,
602
- "bits": 4
603
- },
604
- "model.layers.18.mlp.down_proj": {
605
- "group_size": 64,
606
- "bits": 4
607
- },
608
- "model.layers.18.mlp.up_proj": {
609
- "group_size": 64,
610
- "bits": 4
611
- },
612
- "model.layers.18.input_layernorm": false,
613
- "model.layers.18.post_attention_layernorm": false,
614
- "model.layers.19.self_attn.q_proj": {
615
- "group_size": 64,
616
- "bits": 4
617
- },
618
- "model.layers.19.self_attn.k_proj": {
619
- "group_size": 64,
620
- "bits": 4
621
- },
622
- "model.layers.19.self_attn.v_proj": {
623
- "group_size": 64,
624
- "bits": 4
625
- },
626
- "model.layers.19.self_attn.o_proj": {
627
- "group_size": 64,
628
- "bits": 4
629
- },
630
- "model.layers.19.self_attn.rope": false,
631
- "model.layers.19.mlp.gate_proj": {
632
- "group_size": 64,
633
- "bits": 4
634
- },
635
- "model.layers.19.mlp.down_proj": {
636
- "group_size": 64,
637
- "bits": 4
638
- },
639
- "model.layers.19.mlp.up_proj": {
640
- "group_size": 64,
641
- "bits": 4
642
- },
643
- "model.layers.19.input_layernorm": false,
644
- "model.layers.19.post_attention_layernorm": false,
645
- "model.layers.20.self_attn.q_proj": {
646
- "group_size": 64,
647
- "bits": 4
648
- },
649
- "model.layers.20.self_attn.k_proj": {
650
- "group_size": 64,
651
- "bits": 4
652
- },
653
- "model.layers.20.self_attn.v_proj": {
654
- "group_size": 64,
655
- "bits": 8
656
- },
657
- "model.layers.20.self_attn.o_proj": {
658
- "group_size": 64,
659
- "bits": 4
660
- },
661
- "model.layers.20.self_attn.rope": false,
662
- "model.layers.20.mlp.gate_proj": {
663
- "group_size": 64,
664
- "bits": 4
665
- },
666
- "model.layers.20.mlp.down_proj": {
667
- "group_size": 64,
668
- "bits": 8
669
- },
670
- "model.layers.20.mlp.up_proj": {
671
- "group_size": 64,
672
- "bits": 4
673
- },
674
- "model.layers.20.input_layernorm": false,
675
- "model.layers.20.post_attention_layernorm": false,
676
- "model.layers.21.self_attn.q_proj": {
677
- "group_size": 64,
678
- "bits": 4
679
- },
680
- "model.layers.21.self_attn.k_proj": {
681
- "group_size": 64,
682
- "bits": 4
683
- },
684
- "model.layers.21.self_attn.v_proj": {
685
- "group_size": 64,
686
- "bits": 4
687
- },
688
- "model.layers.21.self_attn.o_proj": {
689
- "group_size": 64,
690
- "bits": 4
691
- },
692
- "model.layers.21.self_attn.rope": false,
693
- "model.layers.21.mlp.gate_proj": {
694
- "group_size": 64,
695
- "bits": 4
696
- },
697
- "model.layers.21.mlp.down_proj": {
698
- "group_size": 64,
699
- "bits": 4
700
- },
701
- "model.layers.21.mlp.up_proj": {
702
- "group_size": 64,
703
- "bits": 4
704
- },
705
- "model.layers.21.input_layernorm": false,
706
- "model.layers.21.post_attention_layernorm": false,
707
- "model.layers.22.self_attn.q_proj": {
708
- "group_size": 64,
709
- "bits": 4
710
- },
711
- "model.layers.22.self_attn.k_proj": {
712
- "group_size": 64,
713
- "bits": 4
714
- },
715
- "model.layers.22.self_attn.v_proj": {
716
- "group_size": 64,
717
- "bits": 4
718
- },
719
- "model.layers.22.self_attn.o_proj": {
720
- "group_size": 64,
721
- "bits": 4
722
- },
723
- "model.layers.22.self_attn.rope": false,
724
- "model.layers.22.mlp.gate_proj": {
725
- "group_size": 64,
726
- "bits": 4
727
- },
728
- "model.layers.22.mlp.down_proj": {
729
- "group_size": 64,
730
- "bits": 4
731
- },
732
- "model.layers.22.mlp.up_proj": {
733
- "group_size": 64,
734
- "bits": 4
735
- },
736
- "model.layers.22.input_layernorm": false,
737
- "model.layers.22.post_attention_layernorm": false,
738
- "model.layers.23.self_attn.q_proj": {
739
- "group_size": 64,
740
- "bits": 4
741
- },
742
- "model.layers.23.self_attn.k_proj": {
743
- "group_size": 64,
744
- "bits": 4
745
- },
746
- "model.layers.23.self_attn.v_proj": {
747
- "group_size": 64,
748
- "bits": 8
749
- },
750
- "model.layers.23.self_attn.o_proj": {
751
- "group_size": 64,
752
- "bits": 4
753
- },
754
- "model.layers.23.self_attn.rope": false,
755
- "model.layers.23.mlp.gate_proj": {
756
- "group_size": 64,
757
- "bits": 4
758
- },
759
- "model.layers.23.mlp.down_proj": {
760
- "group_size": 64,
761
- "bits": 8
762
- },
763
- "model.layers.23.mlp.up_proj": {
764
- "group_size": 64,
765
- "bits": 4
766
- },
767
- "model.layers.23.input_layernorm": false,
768
- "model.layers.23.post_attention_layernorm": false,
769
- "model.layers.24.self_attn.q_proj": {
770
- "group_size": 64,
771
- "bits": 4
772
- },
773
- "model.layers.24.self_attn.k_proj": {
774
- "group_size": 64,
775
- "bits": 4
776
- },
777
- "model.layers.24.self_attn.v_proj": {
778
- "group_size": 64,
779
- "bits": 8
780
- },
781
- "model.layers.24.self_attn.o_proj": {
782
- "group_size": 64,
783
- "bits": 4
784
- },
785
- "model.layers.24.self_attn.rope": false,
786
- "model.layers.24.mlp.gate_proj": {
787
- "group_size": 64,
788
- "bits": 4
789
- },
790
- "model.layers.24.mlp.down_proj": {
791
- "group_size": 64,
792
- "bits": 8
793
- },
794
- "model.layers.24.mlp.up_proj": {
795
- "group_size": 64,
796
- "bits": 4
797
- },
798
- "model.layers.24.input_layernorm": false,
799
- "model.layers.24.post_attention_layernorm": false,
800
- "model.layers.25.self_attn.q_proj": {
801
- "group_size": 64,
802
- "bits": 4
803
- },
804
- "model.layers.25.self_attn.k_proj": {
805
- "group_size": 64,
806
- "bits": 4
807
- },
808
- "model.layers.25.self_attn.v_proj": {
809
- "group_size": 64,
810
- "bits": 8
811
- },
812
- "model.layers.25.self_attn.o_proj": {
813
- "group_size": 64,
814
- "bits": 4
815
- },
816
- "model.layers.25.self_attn.rope": false,
817
- "model.layers.25.mlp.gate_proj": {
818
- "group_size": 64,
819
- "bits": 4
820
- },
821
- "model.layers.25.mlp.down_proj": {
822
- "group_size": 64,
823
- "bits": 8
824
- },
825
- "model.layers.25.mlp.up_proj": {
826
- "group_size": 64,
827
- "bits": 4
828
- },
829
- "model.layers.25.input_layernorm": false,
830
- "model.layers.25.post_attention_layernorm": false,
831
- "model.layers.26.self_attn.q_proj": {
832
- "group_size": 64,
833
- "bits": 4
834
- },
835
- "model.layers.26.self_attn.k_proj": {
836
- "group_size": 64,
837
- "bits": 4
838
- },
839
- "model.layers.26.self_attn.v_proj": {
840
- "group_size": 64,
841
- "bits": 8
842
- },
843
- "model.layers.26.self_attn.o_proj": {
844
- "group_size": 64,
845
- "bits": 4
846
- },
847
- "model.layers.26.self_attn.rope": false,
848
- "model.layers.26.mlp.gate_proj": {
849
- "group_size": 64,
850
- "bits": 4
851
- },
852
- "model.layers.26.mlp.down_proj": {
853
- "group_size": 64,
854
- "bits": 8
855
- },
856
- "model.layers.26.mlp.up_proj": {
857
- "group_size": 64,
858
- "bits": 4
859
- },
860
- "model.layers.26.input_layernorm": false,
861
- "model.layers.26.post_attention_layernorm": false,
862
- "model.layers.27.self_attn.q_proj": {
863
- "group_size": 64,
864
- "bits": 4
865
- },
866
- "model.layers.27.self_attn.k_proj": {
867
- "group_size": 64,
868
- "bits": 4
869
- },
870
- "model.layers.27.self_attn.v_proj": {
871
- "group_size": 64,
872
- "bits": 8
873
- },
874
- "model.layers.27.self_attn.o_proj": {
875
- "group_size": 64,
876
- "bits": 4
877
- },
878
- "model.layers.27.self_attn.rope": false,
879
- "model.layers.27.mlp.gate_proj": {
880
- "group_size": 64,
881
- "bits": 4
882
- },
883
- "model.layers.27.mlp.down_proj": {
884
- "group_size": 64,
885
- "bits": 8
886
- },
887
- "model.layers.27.mlp.up_proj": {
888
- "group_size": 64,
889
- "bits": 4
890
- },
891
- "model.layers.27.input_layernorm": false,
892
- "model.layers.27.post_attention_layernorm": false,
893
- "model.norm": false,
894
- "lm_head": {
895
- "group_size": 64,
896
- "bits": 8
897
- }
898
- },
899
- "quantization_config": {
900
- "group_size": 64,
901
- "bits": null,
902
- "model.embed_tokens": {
903
- "group_size": 64,
904
- "bits": 4
905
- },
906
- "model.layers.0.self_attn.q_proj": {
907
- "group_size": 64,
908
- "bits": 4
909
- },
910
- "model.layers.0.self_attn.k_proj": {
911
- "group_size": 64,
912
- "bits": 4
913
- },
914
- "model.layers.0.self_attn.v_proj": {
915
- "group_size": 64,
916
- "bits": 8
917
- },
918
- "model.layers.0.self_attn.o_proj": {
919
- "group_size": 64,
920
- "bits": 4
921
- },
922
- "model.layers.0.self_attn.rope": false,
923
- "model.layers.0.mlp.gate_proj": {
924
- "group_size": 64,
925
- "bits": 4
926
- },
927
- "model.layers.0.mlp.down_proj": {
928
- "group_size": 64,
929
- "bits": 8
930
- },
931
- "model.layers.0.mlp.up_proj": {
932
- "group_size": 64,
933
- "bits": 4
934
- },
935
- "model.layers.0.input_layernorm": false,
936
- "model.layers.0.post_attention_layernorm": false,
937
- "model.layers.1.self_attn.q_proj": {
938
- "group_size": 64,
939
- "bits": 4
940
- },
941
- "model.layers.1.self_attn.k_proj": {
942
- "group_size": 64,
943
- "bits": 4
944
- },
945
- "model.layers.1.self_attn.v_proj": {
946
- "group_size": 64,
947
- "bits": 8
948
- },
949
- "model.layers.1.self_attn.o_proj": {
950
- "group_size": 64,
951
- "bits": 4
952
- },
953
- "model.layers.1.self_attn.rope": false,
954
- "model.layers.1.mlp.gate_proj": {
955
- "group_size": 64,
956
- "bits": 4
957
- },
958
- "model.layers.1.mlp.down_proj": {
959
- "group_size": 64,
960
- "bits": 8
961
- },
962
- "model.layers.1.mlp.up_proj": {
963
- "group_size": 64,
964
- "bits": 4
965
- },
966
- "model.layers.1.input_layernorm": false,
967
- "model.layers.1.post_attention_layernorm": false,
968
- "model.layers.2.self_attn.q_proj": {
969
- "group_size": 64,
970
- "bits": 4
971
- },
972
- "model.layers.2.self_attn.k_proj": {
973
- "group_size": 64,
974
- "bits": 4
975
- },
976
- "model.layers.2.self_attn.v_proj": {
977
- "group_size": 64,
978
- "bits": 8
979
- },
980
- "model.layers.2.self_attn.o_proj": {
981
- "group_size": 64,
982
- "bits": 4
983
- },
984
- "model.layers.2.self_attn.rope": false,
985
- "model.layers.2.mlp.gate_proj": {
986
- "group_size": 64,
987
- "bits": 4
988
- },
989
- "model.layers.2.mlp.down_proj": {
990
- "group_size": 64,
991
- "bits": 8
992
- },
993
- "model.layers.2.mlp.up_proj": {
994
- "group_size": 64,
995
- "bits": 4
996
- },
997
- "model.layers.2.input_layernorm": false,
998
- "model.layers.2.post_attention_layernorm": false,
999
- "model.layers.3.self_attn.q_proj": {
1000
- "group_size": 64,
1001
- "bits": 4
1002
- },
1003
- "model.layers.3.self_attn.k_proj": {
1004
- "group_size": 64,
1005
- "bits": 4
1006
- },
1007
- "model.layers.3.self_attn.v_proj": {
1008
- "group_size": 64,
1009
- "bits": 4
1010
- },
1011
- "model.layers.3.self_attn.o_proj": {
1012
- "group_size": 64,
1013
- "bits": 4
1014
- },
1015
- "model.layers.3.self_attn.rope": false,
1016
- "model.layers.3.mlp.gate_proj": {
1017
- "group_size": 64,
1018
- "bits": 4
1019
- },
1020
- "model.layers.3.mlp.down_proj": {
1021
- "group_size": 64,
1022
- "bits": 4
1023
- },
1024
- "model.layers.3.mlp.up_proj": {
1025
- "group_size": 64,
1026
- "bits": 4
1027
- },
1028
- "model.layers.3.input_layernorm": false,
1029
- "model.layers.3.post_attention_layernorm": false,
1030
- "model.layers.4.self_attn.q_proj": {
1031
- "group_size": 64,
1032
- "bits": 4
1033
- },
1034
- "model.layers.4.self_attn.k_proj": {
1035
- "group_size": 64,
1036
- "bits": 4
1037
- },
1038
- "model.layers.4.self_attn.v_proj": {
1039
- "group_size": 64,
1040
- "bits": 4
1041
- },
1042
- "model.layers.4.self_attn.o_proj": {
1043
- "group_size": 64,
1044
- "bits": 4
1045
- },
1046
- "model.layers.4.self_attn.rope": false,
1047
- "model.layers.4.mlp.gate_proj": {
1048
- "group_size": 64,
1049
- "bits": 4
1050
- },
1051
- "model.layers.4.mlp.down_proj": {
1052
- "group_size": 64,
1053
- "bits": 4
1054
- },
1055
- "model.layers.4.mlp.up_proj": {
1056
- "group_size": 64,
1057
- "bits": 4
1058
- },
1059
- "model.layers.4.input_layernorm": false,
1060
- "model.layers.4.post_attention_layernorm": false,
1061
- "model.layers.5.self_attn.q_proj": {
1062
- "group_size": 64,
1063
- "bits": 4
1064
- },
1065
- "model.layers.5.self_attn.k_proj": {
1066
- "group_size": 64,
1067
- "bits": 4
1068
- },
1069
- "model.layers.5.self_attn.v_proj": {
1070
- "group_size": 64,
1071
- "bits": 8
1072
- },
1073
- "model.layers.5.self_attn.o_proj": {
1074
- "group_size": 64,
1075
- "bits": 4
1076
- },
1077
- "model.layers.5.self_attn.rope": false,
1078
- "model.layers.5.mlp.gate_proj": {
1079
- "group_size": 64,
1080
- "bits": 4
1081
- },
1082
- "model.layers.5.mlp.down_proj": {
1083
- "group_size": 64,
1084
- "bits": 8
1085
- },
1086
- "model.layers.5.mlp.up_proj": {
1087
- "group_size": 64,
1088
- "bits": 4
1089
- },
1090
- "model.layers.5.input_layernorm": false,
1091
- "model.layers.5.post_attention_layernorm": false,
1092
- "model.layers.6.self_attn.q_proj": {
1093
- "group_size": 64,
1094
- "bits": 4
1095
- },
1096
- "model.layers.6.self_attn.k_proj": {
1097
- "group_size": 64,
1098
- "bits": 4
1099
- },
1100
- "model.layers.6.self_attn.v_proj": {
1101
- "group_size": 64,
1102
- "bits": 4
1103
- },
1104
- "model.layers.6.self_attn.o_proj": {
1105
- "group_size": 64,
1106
- "bits": 4
1107
- },
1108
- "model.layers.6.self_attn.rope": false,
1109
- "model.layers.6.mlp.gate_proj": {
1110
- "group_size": 64,
1111
- "bits": 4
1112
- },
1113
- "model.layers.6.mlp.down_proj": {
1114
- "group_size": 64,
1115
- "bits": 4
1116
- },
1117
- "model.layers.6.mlp.up_proj": {
1118
- "group_size": 64,
1119
- "bits": 4
1120
- },
1121
- "model.layers.6.input_layernorm": false,
1122
- "model.layers.6.post_attention_layernorm": false,
1123
- "model.layers.7.self_attn.q_proj": {
1124
- "group_size": 64,
1125
- "bits": 4
1126
- },
1127
- "model.layers.7.self_attn.k_proj": {
1128
- "group_size": 64,
1129
- "bits": 4
1130
- },
1131
- "model.layers.7.self_attn.v_proj": {
1132
- "group_size": 64,
1133
- "bits": 4
1134
- },
1135
- "model.layers.7.self_attn.o_proj": {
1136
- "group_size": 64,
1137
- "bits": 4
1138
- },
1139
- "model.layers.7.self_attn.rope": false,
1140
- "model.layers.7.mlp.gate_proj": {
1141
- "group_size": 64,
1142
- "bits": 4
1143
- },
1144
- "model.layers.7.mlp.down_proj": {
1145
- "group_size": 64,
1146
- "bits": 4
1147
- },
1148
- "model.layers.7.mlp.up_proj": {
1149
- "group_size": 64,
1150
- "bits": 4
1151
- },
1152
- "model.layers.7.input_layernorm": false,
1153
- "model.layers.7.post_attention_layernorm": false,
1154
- "model.layers.8.self_attn.q_proj": {
1155
- "group_size": 64,
1156
- "bits": 4
1157
- },
1158
- "model.layers.8.self_attn.k_proj": {
1159
- "group_size": 64,
1160
- "bits": 4
1161
- },
1162
- "model.layers.8.self_attn.v_proj": {
1163
- "group_size": 64,
1164
- "bits": 8
1165
- },
1166
- "model.layers.8.self_attn.o_proj": {
1167
- "group_size": 64,
1168
- "bits": 4
1169
- },
1170
- "model.layers.8.self_attn.rope": false,
1171
- "model.layers.8.mlp.gate_proj": {
1172
- "group_size": 64,
1173
- "bits": 4
1174
- },
1175
- "model.layers.8.mlp.down_proj": {
1176
- "group_size": 64,
1177
- "bits": 8
1178
- },
1179
- "model.layers.8.mlp.up_proj": {
1180
- "group_size": 64,
1181
- "bits": 4
1182
- },
1183
- "model.layers.8.input_layernorm": false,
1184
- "model.layers.8.post_attention_layernorm": false,
1185
- "model.layers.9.self_attn.q_proj": {
1186
- "group_size": 64,
1187
- "bits": 4
1188
- },
1189
- "model.layers.9.self_attn.k_proj": {
1190
- "group_size": 64,
1191
- "bits": 4
1192
- },
1193
- "model.layers.9.self_attn.v_proj": {
1194
- "group_size": 64,
1195
- "bits": 4
1196
- },
1197
- "model.layers.9.self_attn.o_proj": {
1198
- "group_size": 64,
1199
- "bits": 4
1200
- },
1201
- "model.layers.9.self_attn.rope": false,
1202
- "model.layers.9.mlp.gate_proj": {
1203
- "group_size": 64,
1204
- "bits": 4
1205
- },
1206
- "model.layers.9.mlp.down_proj": {
1207
- "group_size": 64,
1208
- "bits": 4
1209
- },
1210
- "model.layers.9.mlp.up_proj": {
1211
- "group_size": 64,
1212
- "bits": 4
1213
- },
1214
- "model.layers.9.input_layernorm": false,
1215
- "model.layers.9.post_attention_layernorm": false,
1216
- "model.layers.10.self_attn.q_proj": {
1217
- "group_size": 64,
1218
- "bits": 4
1219
- },
1220
- "model.layers.10.self_attn.k_proj": {
1221
- "group_size": 64,
1222
- "bits": 4
1223
- },
1224
- "model.layers.10.self_attn.v_proj": {
1225
- "group_size": 64,
1226
- "bits": 4
1227
- },
1228
- "model.layers.10.self_attn.o_proj": {
1229
- "group_size": 64,
1230
- "bits": 4
1231
- },
1232
- "model.layers.10.self_attn.rope": false,
1233
- "model.layers.10.mlp.gate_proj": {
1234
- "group_size": 64,
1235
- "bits": 4
1236
- },
1237
- "model.layers.10.mlp.down_proj": {
1238
- "group_size": 64,
1239
- "bits": 4
1240
- },
1241
- "model.layers.10.mlp.up_proj": {
1242
- "group_size": 64,
1243
- "bits": 4
1244
- },
1245
- "model.layers.10.input_layernorm": false,
1246
- "model.layers.10.post_attention_layernorm": false,
1247
- "model.layers.11.self_attn.q_proj": {
1248
- "group_size": 64,
1249
- "bits": 4
1250
- },
1251
- "model.layers.11.self_attn.k_proj": {
1252
- "group_size": 64,
1253
- "bits": 4
1254
- },
1255
- "model.layers.11.self_attn.v_proj": {
1256
- "group_size": 64,
1257
- "bits": 8
1258
- },
1259
- "model.layers.11.self_attn.o_proj": {
1260
- "group_size": 64,
1261
- "bits": 4
1262
- },
1263
- "model.layers.11.self_attn.rope": false,
1264
- "model.layers.11.mlp.gate_proj": {
1265
- "group_size": 64,
1266
- "bits": 4
1267
- },
1268
- "model.layers.11.mlp.down_proj": {
1269
- "group_size": 64,
1270
- "bits": 8
1271
- },
1272
- "model.layers.11.mlp.up_proj": {
1273
- "group_size": 64,
1274
- "bits": 4
1275
- },
1276
- "model.layers.11.input_layernorm": false,
1277
- "model.layers.11.post_attention_layernorm": false,
1278
- "model.layers.12.self_attn.q_proj": {
1279
- "group_size": 64,
1280
- "bits": 4
1281
- },
1282
- "model.layers.12.self_attn.k_proj": {
1283
- "group_size": 64,
1284
- "bits": 4
1285
- },
1286
- "model.layers.12.self_attn.v_proj": {
1287
- "group_size": 64,
1288
- "bits": 4
1289
- },
1290
- "model.layers.12.self_attn.o_proj": {
1291
- "group_size": 64,
1292
- "bits": 4
1293
- },
1294
- "model.layers.12.self_attn.rope": false,
1295
- "model.layers.12.mlp.gate_proj": {
1296
- "group_size": 64,
1297
- "bits": 4
1298
- },
1299
- "model.layers.12.mlp.down_proj": {
1300
- "group_size": 64,
1301
- "bits": 4
1302
- },
1303
- "model.layers.12.mlp.up_proj": {
1304
- "group_size": 64,
1305
- "bits": 4
1306
- },
1307
- "model.layers.12.input_layernorm": false,
1308
- "model.layers.12.post_attention_layernorm": false,
1309
- "model.layers.13.self_attn.q_proj": {
1310
- "group_size": 64,
1311
- "bits": 4
1312
- },
1313
- "model.layers.13.self_attn.k_proj": {
1314
- "group_size": 64,
1315
- "bits": 4
1316
- },
1317
- "model.layers.13.self_attn.v_proj": {
1318
- "group_size": 64,
1319
- "bits": 4
1320
- },
1321
- "model.layers.13.self_attn.o_proj": {
1322
- "group_size": 64,
1323
- "bits": 4
1324
- },
1325
- "model.layers.13.self_attn.rope": false,
1326
- "model.layers.13.mlp.gate_proj": {
1327
- "group_size": 64,
1328
- "bits": 4
1329
- },
1330
- "model.layers.13.mlp.down_proj": {
1331
- "group_size": 64,
1332
- "bits": 4
1333
- },
1334
- "model.layers.13.mlp.up_proj": {
1335
- "group_size": 64,
1336
- "bits": 4
1337
- },
1338
- "model.layers.13.input_layernorm": false,
1339
- "model.layers.13.post_attention_layernorm": false,
1340
- "model.layers.14.self_attn.q_proj": {
1341
- "group_size": 64,
1342
- "bits": 4
1343
- },
1344
- "model.layers.14.self_attn.k_proj": {
1345
- "group_size": 64,
1346
- "bits": 4
1347
- },
1348
- "model.layers.14.self_attn.v_proj": {
1349
- "group_size": 64,
1350
- "bits": 8
1351
- },
1352
- "model.layers.14.self_attn.o_proj": {
1353
- "group_size": 64,
1354
- "bits": 4
1355
- },
1356
- "model.layers.14.self_attn.rope": false,
1357
- "model.layers.14.mlp.gate_proj": {
1358
- "group_size": 64,
1359
- "bits": 4
1360
- },
1361
- "model.layers.14.mlp.down_proj": {
1362
- "group_size": 64,
1363
- "bits": 8
1364
- },
1365
- "model.layers.14.mlp.up_proj": {
1366
- "group_size": 64,
1367
- "bits": 4
1368
- },
1369
- "model.layers.14.input_layernorm": false,
1370
- "model.layers.14.post_attention_layernorm": false,
1371
- "model.layers.15.self_attn.q_proj": {
1372
- "group_size": 64,
1373
- "bits": 4
1374
- },
1375
- "model.layers.15.self_attn.k_proj": {
1376
- "group_size": 64,
1377
- "bits": 4
1378
- },
1379
- "model.layers.15.self_attn.v_proj": {
1380
- "group_size": 64,
1381
- "bits": 4
1382
- },
1383
- "model.layers.15.self_attn.o_proj": {
1384
- "group_size": 64,
1385
- "bits": 4
1386
- },
1387
- "model.layers.15.self_attn.rope": false,
1388
- "model.layers.15.mlp.gate_proj": {
1389
- "group_size": 64,
1390
- "bits": 4
1391
- },
1392
- "model.layers.15.mlp.down_proj": {
1393
- "group_size": 64,
1394
- "bits": 4
1395
- },
1396
- "model.layers.15.mlp.up_proj": {
1397
- "group_size": 64,
1398
- "bits": 4
1399
- },
1400
- "model.layers.15.input_layernorm": false,
1401
- "model.layers.15.post_attention_layernorm": false,
1402
- "model.layers.16.self_attn.q_proj": {
1403
- "group_size": 64,
1404
- "bits": 4
1405
- },
1406
- "model.layers.16.self_attn.k_proj": {
1407
- "group_size": 64,
1408
- "bits": 4
1409
- },
1410
- "model.layers.16.self_attn.v_proj": {
1411
- "group_size": 64,
1412
- "bits": 4
1413
- },
1414
- "model.layers.16.self_attn.o_proj": {
1415
- "group_size": 64,
1416
- "bits": 4
1417
- },
1418
- "model.layers.16.self_attn.rope": false,
1419
- "model.layers.16.mlp.gate_proj": {
1420
- "group_size": 64,
1421
- "bits": 4
1422
- },
1423
- "model.layers.16.mlp.down_proj": {
1424
- "group_size": 64,
1425
- "bits": 4
1426
- },
1427
- "model.layers.16.mlp.up_proj": {
1428
- "group_size": 64,
1429
- "bits": 4
1430
- },
1431
- "model.layers.16.input_layernorm": false,
1432
- "model.layers.16.post_attention_layernorm": false,
1433
- "model.layers.17.self_attn.q_proj": {
1434
- "group_size": 64,
1435
- "bits": 4
1436
- },
1437
- "model.layers.17.self_attn.k_proj": {
1438
- "group_size": 64,
1439
- "bits": 4
1440
- },
1441
- "model.layers.17.self_attn.v_proj": {
1442
- "group_size": 64,
1443
- "bits": 8
1444
- },
1445
- "model.layers.17.self_attn.o_proj": {
1446
- "group_size": 64,
1447
- "bits": 4
1448
- },
1449
- "model.layers.17.self_attn.rope": false,
1450
- "model.layers.17.mlp.gate_proj": {
1451
- "group_size": 64,
1452
- "bits": 4
1453
- },
1454
- "model.layers.17.mlp.down_proj": {
1455
- "group_size": 64,
1456
- "bits": 8
1457
- },
1458
- "model.layers.17.mlp.up_proj": {
1459
- "group_size": 64,
1460
- "bits": 4
1461
- },
1462
- "model.layers.17.input_layernorm": false,
1463
- "model.layers.17.post_attention_layernorm": false,
1464
- "model.layers.18.self_attn.q_proj": {
1465
- "group_size": 64,
1466
- "bits": 4
1467
- },
1468
- "model.layers.18.self_attn.k_proj": {
1469
- "group_size": 64,
1470
- "bits": 4
1471
- },
1472
- "model.layers.18.self_attn.v_proj": {
1473
- "group_size": 64,
1474
- "bits": 4
1475
- },
1476
- "model.layers.18.self_attn.o_proj": {
1477
- "group_size": 64,
1478
- "bits": 4
1479
- },
1480
- "model.layers.18.self_attn.rope": false,
1481
- "model.layers.18.mlp.gate_proj": {
1482
- "group_size": 64,
1483
- "bits": 4
1484
- },
1485
- "model.layers.18.mlp.down_proj": {
1486
- "group_size": 64,
1487
- "bits": 4
1488
- },
1489
- "model.layers.18.mlp.up_proj": {
1490
- "group_size": 64,
1491
- "bits": 4
1492
- },
1493
- "model.layers.18.input_layernorm": false,
1494
- "model.layers.18.post_attention_layernorm": false,
1495
- "model.layers.19.self_attn.q_proj": {
1496
- "group_size": 64,
1497
- "bits": 4
1498
- },
1499
- "model.layers.19.self_attn.k_proj": {
1500
- "group_size": 64,
1501
- "bits": 4
1502
- },
1503
- "model.layers.19.self_attn.v_proj": {
1504
- "group_size": 64,
1505
- "bits": 4
1506
- },
1507
- "model.layers.19.self_attn.o_proj": {
1508
- "group_size": 64,
1509
- "bits": 4
1510
- },
1511
- "model.layers.19.self_attn.rope": false,
1512
- "model.layers.19.mlp.gate_proj": {
1513
- "group_size": 64,
1514
- "bits": 4
1515
- },
1516
- "model.layers.19.mlp.down_proj": {
1517
- "group_size": 64,
1518
- "bits": 4
1519
- },
1520
- "model.layers.19.mlp.up_proj": {
1521
- "group_size": 64,
1522
- "bits": 4
1523
- },
1524
- "model.layers.19.input_layernorm": false,
1525
- "model.layers.19.post_attention_layernorm": false,
1526
- "model.layers.20.self_attn.q_proj": {
1527
- "group_size": 64,
1528
- "bits": 4
1529
- },
1530
- "model.layers.20.self_attn.k_proj": {
1531
- "group_size": 64,
1532
- "bits": 4
1533
- },
1534
- "model.layers.20.self_attn.v_proj": {
1535
- "group_size": 64,
1536
- "bits": 8
1537
- },
1538
- "model.layers.20.self_attn.o_proj": {
1539
- "group_size": 64,
1540
- "bits": 4
1541
- },
1542
- "model.layers.20.self_attn.rope": false,
1543
- "model.layers.20.mlp.gate_proj": {
1544
- "group_size": 64,
1545
- "bits": 4
1546
- },
1547
- "model.layers.20.mlp.down_proj": {
1548
- "group_size": 64,
1549
- "bits": 8
1550
- },
1551
- "model.layers.20.mlp.up_proj": {
1552
- "group_size": 64,
1553
- "bits": 4
1554
- },
1555
- "model.layers.20.input_layernorm": false,
1556
- "model.layers.20.post_attention_layernorm": false,
1557
- "model.layers.21.self_attn.q_proj": {
1558
- "group_size": 64,
1559
- "bits": 4
1560
- },
1561
- "model.layers.21.self_attn.k_proj": {
1562
- "group_size": 64,
1563
- "bits": 4
1564
- },
1565
- "model.layers.21.self_attn.v_proj": {
1566
- "group_size": 64,
1567
- "bits": 4
1568
- },
1569
- "model.layers.21.self_attn.o_proj": {
1570
- "group_size": 64,
1571
- "bits": 4
1572
- },
1573
- "model.layers.21.self_attn.rope": false,
1574
- "model.layers.21.mlp.gate_proj": {
1575
- "group_size": 64,
1576
- "bits": 4
1577
- },
1578
- "model.layers.21.mlp.down_proj": {
1579
- "group_size": 64,
1580
- "bits": 4
1581
- },
1582
- "model.layers.21.mlp.up_proj": {
1583
- "group_size": 64,
1584
- "bits": 4
1585
- },
1586
- "model.layers.21.input_layernorm": false,
1587
- "model.layers.21.post_attention_layernorm": false,
1588
- "model.layers.22.self_attn.q_proj": {
1589
- "group_size": 64,
1590
- "bits": 4
1591
- },
1592
- "model.layers.22.self_attn.k_proj": {
1593
- "group_size": 64,
1594
- "bits": 4
1595
- },
1596
- "model.layers.22.self_attn.v_proj": {
1597
- "group_size": 64,
1598
- "bits": 4
1599
- },
1600
- "model.layers.22.self_attn.o_proj": {
1601
- "group_size": 64,
1602
- "bits": 4
1603
- },
1604
- "model.layers.22.self_attn.rope": false,
1605
- "model.layers.22.mlp.gate_proj": {
1606
- "group_size": 64,
1607
- "bits": 4
1608
- },
1609
- "model.layers.22.mlp.down_proj": {
1610
- "group_size": 64,
1611
- "bits": 4
1612
- },
1613
- "model.layers.22.mlp.up_proj": {
1614
- "group_size": 64,
1615
- "bits": 4
1616
- },
1617
- "model.layers.22.input_layernorm": false,
1618
- "model.layers.22.post_attention_layernorm": false,
1619
- "model.layers.23.self_attn.q_proj": {
1620
- "group_size": 64,
1621
- "bits": 4
1622
- },
1623
- "model.layers.23.self_attn.k_proj": {
1624
- "group_size": 64,
1625
- "bits": 4
1626
- },
1627
- "model.layers.23.self_attn.v_proj": {
1628
- "group_size": 64,
1629
- "bits": 8
1630
- },
1631
- "model.layers.23.self_attn.o_proj": {
1632
- "group_size": 64,
1633
- "bits": 4
1634
- },
1635
- "model.layers.23.self_attn.rope": false,
1636
- "model.layers.23.mlp.gate_proj": {
1637
- "group_size": 64,
1638
- "bits": 4
1639
- },
1640
- "model.layers.23.mlp.down_proj": {
1641
- "group_size": 64,
1642
- "bits": 8
1643
- },
1644
- "model.layers.23.mlp.up_proj": {
1645
- "group_size": 64,
1646
- "bits": 4
1647
- },
1648
- "model.layers.23.input_layernorm": false,
1649
- "model.layers.23.post_attention_layernorm": false,
1650
- "model.layers.24.self_attn.q_proj": {
1651
- "group_size": 64,
1652
- "bits": 4
1653
- },
1654
- "model.layers.24.self_attn.k_proj": {
1655
- "group_size": 64,
1656
- "bits": 4
1657
- },
1658
- "model.layers.24.self_attn.v_proj": {
1659
- "group_size": 64,
1660
- "bits": 8
1661
- },
1662
- "model.layers.24.self_attn.o_proj": {
1663
- "group_size": 64,
1664
- "bits": 4
1665
- },
1666
- "model.layers.24.self_attn.rope": false,
1667
- "model.layers.24.mlp.gate_proj": {
1668
- "group_size": 64,
1669
- "bits": 4
1670
- },
1671
- "model.layers.24.mlp.down_proj": {
1672
- "group_size": 64,
1673
- "bits": 8
1674
- },
1675
- "model.layers.24.mlp.up_proj": {
1676
- "group_size": 64,
1677
- "bits": 4
1678
- },
1679
- "model.layers.24.input_layernorm": false,
1680
- "model.layers.24.post_attention_layernorm": false,
1681
- "model.layers.25.self_attn.q_proj": {
1682
- "group_size": 64,
1683
- "bits": 4
1684
- },
1685
- "model.layers.25.self_attn.k_proj": {
1686
- "group_size": 64,
1687
- "bits": 4
1688
- },
1689
- "model.layers.25.self_attn.v_proj": {
1690
- "group_size": 64,
1691
- "bits": 8
1692
- },
1693
- "model.layers.25.self_attn.o_proj": {
1694
- "group_size": 64,
1695
- "bits": 4
1696
- },
1697
- "model.layers.25.self_attn.rope": false,
1698
- "model.layers.25.mlp.gate_proj": {
1699
- "group_size": 64,
1700
- "bits": 4
1701
- },
1702
- "model.layers.25.mlp.down_proj": {
1703
- "group_size": 64,
1704
- "bits": 8
1705
- },
1706
- "model.layers.25.mlp.up_proj": {
1707
- "group_size": 64,
1708
- "bits": 4
1709
- },
1710
- "model.layers.25.input_layernorm": false,
1711
- "model.layers.25.post_attention_layernorm": false,
1712
- "model.layers.26.self_attn.q_proj": {
1713
- "group_size": 64,
1714
- "bits": 4
1715
- },
1716
- "model.layers.26.self_attn.k_proj": {
1717
- "group_size": 64,
1718
- "bits": 4
1719
- },
1720
- "model.layers.26.self_attn.v_proj": {
1721
- "group_size": 64,
1722
- "bits": 8
1723
- },
1724
- "model.layers.26.self_attn.o_proj": {
1725
- "group_size": 64,
1726
- "bits": 4
1727
- },
1728
- "model.layers.26.self_attn.rope": false,
1729
- "model.layers.26.mlp.gate_proj": {
1730
- "group_size": 64,
1731
- "bits": 4
1732
- },
1733
- "model.layers.26.mlp.down_proj": {
1734
- "group_size": 64,
1735
- "bits": 8
1736
- },
1737
- "model.layers.26.mlp.up_proj": {
1738
- "group_size": 64,
1739
- "bits": 4
1740
- },
1741
- "model.layers.26.input_layernorm": false,
1742
- "model.layers.26.post_attention_layernorm": false,
1743
- "model.layers.27.self_attn.q_proj": {
1744
- "group_size": 64,
1745
- "bits": 4
1746
- },
1747
- "model.layers.27.self_attn.k_proj": {
1748
- "group_size": 64,
1749
- "bits": 4
1750
- },
1751
- "model.layers.27.self_attn.v_proj": {
1752
- "group_size": 64,
1753
- "bits": 8
1754
- },
1755
- "model.layers.27.self_attn.o_proj": {
1756
- "group_size": 64,
1757
- "bits": 4
1758
- },
1759
- "model.layers.27.self_attn.rope": false,
1760
- "model.layers.27.mlp.gate_proj": {
1761
- "group_size": 64,
1762
- "bits": 4
1763
- },
1764
- "model.layers.27.mlp.down_proj": {
1765
- "group_size": 64,
1766
- "bits": 8
1767
- },
1768
- "model.layers.27.mlp.up_proj": {
1769
- "group_size": 64,
1770
- "bits": 4
1771
- },
1772
- "model.layers.27.input_layernorm": false,
1773
- "model.layers.27.post_attention_layernorm": false,
1774
- "model.norm": false,
1775
- "lm_head": {
1776
- "group_size": 64,
1777
- "bits": 8
1778
- }
1779
- },
1780
- "rms_norm_eps": 1e-06,
1781
- "rope_theta": 10000,
1782
- "sliding_window": 4096,
1783
- "tie_word_embeddings": false,
1784
- "torch_dtype": "bfloat16",
1785
- "transformers_version": "4.44.0",
1786
- "use_cache": true,
1787
- "use_mrope": false,
1788
- "use_sliding_window": false,
1789
- "vocab_size": 152064
1790
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:88ee9eaed0352955850f54891597506220fd2b358cfd0039d46dea9500049067
3
- size 5044957181
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/model.safetensors.index.json DELETED
@@ -1,742 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 5044874240
4
- },
5
- "weight_map": {
6
- "lm_head.biases": "model.safetensors",
7
- "lm_head.scales": "model.safetensors",
8
- "lm_head.weight": "model.safetensors",
9
- "model.embed_tokens.biases": "model.safetensors",
10
- "model.embed_tokens.scales": "model.safetensors",
11
- "model.embed_tokens.weight": "model.safetensors",
12
- "model.layers.0.input_layernorm.weight": "model.safetensors",
13
- "model.layers.0.mlp.down_proj.biases": "model.safetensors",
14
- "model.layers.0.mlp.down_proj.scales": "model.safetensors",
15
- "model.layers.0.mlp.down_proj.weight": "model.safetensors",
16
- "model.layers.0.mlp.gate_proj.biases": "model.safetensors",
17
- "model.layers.0.mlp.gate_proj.scales": "model.safetensors",
18
- "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
19
- "model.layers.0.mlp.up_proj.biases": "model.safetensors",
20
- "model.layers.0.mlp.up_proj.scales": "model.safetensors",
21
- "model.layers.0.mlp.up_proj.weight": "model.safetensors",
22
- "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
23
- "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
24
- "model.layers.0.self_attn.k_proj.biases": "model.safetensors",
25
- "model.layers.0.self_attn.k_proj.scales": "model.safetensors",
26
- "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
27
- "model.layers.0.self_attn.o_proj.biases": "model.safetensors",
28
- "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
29
- "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
30
- "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
31
- "model.layers.0.self_attn.q_proj.biases": "model.safetensors",
32
- "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
33
- "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
34
- "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
35
- "model.layers.0.self_attn.v_proj.biases": "model.safetensors",
36
- "model.layers.0.self_attn.v_proj.scales": "model.safetensors",
37
- "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
38
- "model.layers.1.input_layernorm.weight": "model.safetensors",
39
- "model.layers.1.mlp.down_proj.biases": "model.safetensors",
40
- "model.layers.1.mlp.down_proj.scales": "model.safetensors",
41
- "model.layers.1.mlp.down_proj.weight": "model.safetensors",
42
- "model.layers.1.mlp.gate_proj.biases": "model.safetensors",
43
- "model.layers.1.mlp.gate_proj.scales": "model.safetensors",
44
- "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
45
- "model.layers.1.mlp.up_proj.biases": "model.safetensors",
46
- "model.layers.1.mlp.up_proj.scales": "model.safetensors",
47
- "model.layers.1.mlp.up_proj.weight": "model.safetensors",
48
- "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
49
- "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
50
- "model.layers.1.self_attn.k_proj.biases": "model.safetensors",
51
- "model.layers.1.self_attn.k_proj.scales": "model.safetensors",
52
- "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
53
- "model.layers.1.self_attn.o_proj.biases": "model.safetensors",
54
- "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
55
- "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
56
- "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
57
- "model.layers.1.self_attn.q_proj.biases": "model.safetensors",
58
- "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
59
- "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
60
- "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
61
- "model.layers.1.self_attn.v_proj.biases": "model.safetensors",
62
- "model.layers.1.self_attn.v_proj.scales": "model.safetensors",
63
- "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
64
- "model.layers.10.input_layernorm.weight": "model.safetensors",
65
- "model.layers.10.mlp.down_proj.biases": "model.safetensors",
66
- "model.layers.10.mlp.down_proj.scales": "model.safetensors",
67
- "model.layers.10.mlp.down_proj.weight": "model.safetensors",
68
- "model.layers.10.mlp.gate_proj.biases": "model.safetensors",
69
- "model.layers.10.mlp.gate_proj.scales": "model.safetensors",
70
- "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
71
- "model.layers.10.mlp.up_proj.biases": "model.safetensors",
72
- "model.layers.10.mlp.up_proj.scales": "model.safetensors",
73
- "model.layers.10.mlp.up_proj.weight": "model.safetensors",
74
- "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
75
- "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
76
- "model.layers.10.self_attn.k_proj.biases": "model.safetensors",
77
- "model.layers.10.self_attn.k_proj.scales": "model.safetensors",
78
- "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
79
- "model.layers.10.self_attn.o_proj.biases": "model.safetensors",
80
- "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
81
- "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
82
- "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
83
- "model.layers.10.self_attn.q_proj.biases": "model.safetensors",
84
- "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
85
- "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
86
- "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
87
- "model.layers.10.self_attn.v_proj.biases": "model.safetensors",
88
- "model.layers.10.self_attn.v_proj.scales": "model.safetensors",
89
- "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
90
- "model.layers.11.input_layernorm.weight": "model.safetensors",
91
- "model.layers.11.mlp.down_proj.biases": "model.safetensors",
92
- "model.layers.11.mlp.down_proj.scales": "model.safetensors",
93
- "model.layers.11.mlp.down_proj.weight": "model.safetensors",
94
- "model.layers.11.mlp.gate_proj.biases": "model.safetensors",
95
- "model.layers.11.mlp.gate_proj.scales": "model.safetensors",
96
- "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
97
- "model.layers.11.mlp.up_proj.biases": "model.safetensors",
98
- "model.layers.11.mlp.up_proj.scales": "model.safetensors",
99
- "model.layers.11.mlp.up_proj.weight": "model.safetensors",
100
- "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
101
- "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
102
- "model.layers.11.self_attn.k_proj.biases": "model.safetensors",
103
- "model.layers.11.self_attn.k_proj.scales": "model.safetensors",
104
- "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
105
- "model.layers.11.self_attn.o_proj.biases": "model.safetensors",
106
- "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
107
- "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
108
- "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
109
- "model.layers.11.self_attn.q_proj.biases": "model.safetensors",
110
- "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
111
- "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
112
- "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
113
- "model.layers.11.self_attn.v_proj.biases": "model.safetensors",
114
- "model.layers.11.self_attn.v_proj.scales": "model.safetensors",
115
- "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
116
- "model.layers.12.input_layernorm.weight": "model.safetensors",
117
- "model.layers.12.mlp.down_proj.biases": "model.safetensors",
118
- "model.layers.12.mlp.down_proj.scales": "model.safetensors",
119
- "model.layers.12.mlp.down_proj.weight": "model.safetensors",
120
- "model.layers.12.mlp.gate_proj.biases": "model.safetensors",
121
- "model.layers.12.mlp.gate_proj.scales": "model.safetensors",
122
- "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
123
- "model.layers.12.mlp.up_proj.biases": "model.safetensors",
124
- "model.layers.12.mlp.up_proj.scales": "model.safetensors",
125
- "model.layers.12.mlp.up_proj.weight": "model.safetensors",
126
- "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
127
- "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
128
- "model.layers.12.self_attn.k_proj.biases": "model.safetensors",
129
- "model.layers.12.self_attn.k_proj.scales": "model.safetensors",
130
- "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
131
- "model.layers.12.self_attn.o_proj.biases": "model.safetensors",
132
- "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
133
- "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
134
- "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
135
- "model.layers.12.self_attn.q_proj.biases": "model.safetensors",
136
- "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
137
- "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
138
- "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
139
- "model.layers.12.self_attn.v_proj.biases": "model.safetensors",
140
- "model.layers.12.self_attn.v_proj.scales": "model.safetensors",
141
- "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
142
- "model.layers.13.input_layernorm.weight": "model.safetensors",
143
- "model.layers.13.mlp.down_proj.biases": "model.safetensors",
144
- "model.layers.13.mlp.down_proj.scales": "model.safetensors",
145
- "model.layers.13.mlp.down_proj.weight": "model.safetensors",
146
- "model.layers.13.mlp.gate_proj.biases": "model.safetensors",
147
- "model.layers.13.mlp.gate_proj.scales": "model.safetensors",
148
- "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
149
- "model.layers.13.mlp.up_proj.biases": "model.safetensors",
150
- "model.layers.13.mlp.up_proj.scales": "model.safetensors",
151
- "model.layers.13.mlp.up_proj.weight": "model.safetensors",
152
- "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
153
- "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
154
- "model.layers.13.self_attn.k_proj.biases": "model.safetensors",
155
- "model.layers.13.self_attn.k_proj.scales": "model.safetensors",
156
- "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
157
- "model.layers.13.self_attn.o_proj.biases": "model.safetensors",
158
- "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
159
- "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
160
- "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
161
- "model.layers.13.self_attn.q_proj.biases": "model.safetensors",
162
- "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
163
- "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
164
- "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
165
- "model.layers.13.self_attn.v_proj.biases": "model.safetensors",
166
- "model.layers.13.self_attn.v_proj.scales": "model.safetensors",
167
- "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
168
- "model.layers.14.input_layernorm.weight": "model.safetensors",
169
- "model.layers.14.mlp.down_proj.biases": "model.safetensors",
170
- "model.layers.14.mlp.down_proj.scales": "model.safetensors",
171
- "model.layers.14.mlp.down_proj.weight": "model.safetensors",
172
- "model.layers.14.mlp.gate_proj.biases": "model.safetensors",
173
- "model.layers.14.mlp.gate_proj.scales": "model.safetensors",
174
- "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
175
- "model.layers.14.mlp.up_proj.biases": "model.safetensors",
176
- "model.layers.14.mlp.up_proj.scales": "model.safetensors",
177
- "model.layers.14.mlp.up_proj.weight": "model.safetensors",
178
- "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
179
- "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
180
- "model.layers.14.self_attn.k_proj.biases": "model.safetensors",
181
- "model.layers.14.self_attn.k_proj.scales": "model.safetensors",
182
- "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
183
- "model.layers.14.self_attn.o_proj.biases": "model.safetensors",
184
- "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
185
- "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
186
- "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
187
- "model.layers.14.self_attn.q_proj.biases": "model.safetensors",
188
- "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
189
- "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
190
- "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
191
- "model.layers.14.self_attn.v_proj.biases": "model.safetensors",
192
- "model.layers.14.self_attn.v_proj.scales": "model.safetensors",
193
- "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
194
- "model.layers.15.input_layernorm.weight": "model.safetensors",
195
- "model.layers.15.mlp.down_proj.biases": "model.safetensors",
196
- "model.layers.15.mlp.down_proj.scales": "model.safetensors",
197
- "model.layers.15.mlp.down_proj.weight": "model.safetensors",
198
- "model.layers.15.mlp.gate_proj.biases": "model.safetensors",
199
- "model.layers.15.mlp.gate_proj.scales": "model.safetensors",
200
- "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
201
- "model.layers.15.mlp.up_proj.biases": "model.safetensors",
202
- "model.layers.15.mlp.up_proj.scales": "model.safetensors",
203
- "model.layers.15.mlp.up_proj.weight": "model.safetensors",
204
- "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
205
- "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
206
- "model.layers.15.self_attn.k_proj.biases": "model.safetensors",
207
- "model.layers.15.self_attn.k_proj.scales": "model.safetensors",
208
- "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
209
- "model.layers.15.self_attn.o_proj.biases": "model.safetensors",
210
- "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
211
- "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
212
- "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
213
- "model.layers.15.self_attn.q_proj.biases": "model.safetensors",
214
- "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
215
- "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
216
- "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
217
- "model.layers.15.self_attn.v_proj.biases": "model.safetensors",
218
- "model.layers.15.self_attn.v_proj.scales": "model.safetensors",
219
- "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
220
- "model.layers.16.input_layernorm.weight": "model.safetensors",
221
- "model.layers.16.mlp.down_proj.biases": "model.safetensors",
222
- "model.layers.16.mlp.down_proj.scales": "model.safetensors",
223
- "model.layers.16.mlp.down_proj.weight": "model.safetensors",
224
- "model.layers.16.mlp.gate_proj.biases": "model.safetensors",
225
- "model.layers.16.mlp.gate_proj.scales": "model.safetensors",
226
- "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
227
- "model.layers.16.mlp.up_proj.biases": "model.safetensors",
228
- "model.layers.16.mlp.up_proj.scales": "model.safetensors",
229
- "model.layers.16.mlp.up_proj.weight": "model.safetensors",
230
- "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
231
- "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
232
- "model.layers.16.self_attn.k_proj.biases": "model.safetensors",
233
- "model.layers.16.self_attn.k_proj.scales": "model.safetensors",
234
- "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
235
- "model.layers.16.self_attn.o_proj.biases": "model.safetensors",
236
- "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
237
- "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
238
- "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
239
- "model.layers.16.self_attn.q_proj.biases": "model.safetensors",
240
- "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
241
- "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
242
- "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
243
- "model.layers.16.self_attn.v_proj.biases": "model.safetensors",
244
- "model.layers.16.self_attn.v_proj.scales": "model.safetensors",
245
- "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
246
- "model.layers.17.input_layernorm.weight": "model.safetensors",
247
- "model.layers.17.mlp.down_proj.biases": "model.safetensors",
248
- "model.layers.17.mlp.down_proj.scales": "model.safetensors",
249
- "model.layers.17.mlp.down_proj.weight": "model.safetensors",
250
- "model.layers.17.mlp.gate_proj.biases": "model.safetensors",
251
- "model.layers.17.mlp.gate_proj.scales": "model.safetensors",
252
- "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
253
- "model.layers.17.mlp.up_proj.biases": "model.safetensors",
254
- "model.layers.17.mlp.up_proj.scales": "model.safetensors",
255
- "model.layers.17.mlp.up_proj.weight": "model.safetensors",
256
- "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
257
- "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
258
- "model.layers.17.self_attn.k_proj.biases": "model.safetensors",
259
- "model.layers.17.self_attn.k_proj.scales": "model.safetensors",
260
- "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
261
- "model.layers.17.self_attn.o_proj.biases": "model.safetensors",
262
- "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
263
- "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
264
- "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
265
- "model.layers.17.self_attn.q_proj.biases": "model.safetensors",
266
- "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
267
- "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
268
- "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
269
- "model.layers.17.self_attn.v_proj.biases": "model.safetensors",
270
- "model.layers.17.self_attn.v_proj.scales": "model.safetensors",
271
- "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
272
- "model.layers.18.input_layernorm.weight": "model.safetensors",
273
- "model.layers.18.mlp.down_proj.biases": "model.safetensors",
274
- "model.layers.18.mlp.down_proj.scales": "model.safetensors",
275
- "model.layers.18.mlp.down_proj.weight": "model.safetensors",
276
- "model.layers.18.mlp.gate_proj.biases": "model.safetensors",
277
- "model.layers.18.mlp.gate_proj.scales": "model.safetensors",
278
- "model.layers.18.mlp.gate_proj.weight": "model.safetensors",
279
- "model.layers.18.mlp.up_proj.biases": "model.safetensors",
280
- "model.layers.18.mlp.up_proj.scales": "model.safetensors",
281
- "model.layers.18.mlp.up_proj.weight": "model.safetensors",
282
- "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
283
- "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
284
- "model.layers.18.self_attn.k_proj.biases": "model.safetensors",
285
- "model.layers.18.self_attn.k_proj.scales": "model.safetensors",
286
- "model.layers.18.self_attn.k_proj.weight": "model.safetensors",
287
- "model.layers.18.self_attn.o_proj.biases": "model.safetensors",
288
- "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
289
- "model.layers.18.self_attn.o_proj.weight": "model.safetensors",
290
- "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
291
- "model.layers.18.self_attn.q_proj.biases": "model.safetensors",
292
- "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
293
- "model.layers.18.self_attn.q_proj.weight": "model.safetensors",
294
- "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
295
- "model.layers.18.self_attn.v_proj.biases": "model.safetensors",
296
- "model.layers.18.self_attn.v_proj.scales": "model.safetensors",
297
- "model.layers.18.self_attn.v_proj.weight": "model.safetensors",
298
- "model.layers.19.input_layernorm.weight": "model.safetensors",
299
- "model.layers.19.mlp.down_proj.biases": "model.safetensors",
300
- "model.layers.19.mlp.down_proj.scales": "model.safetensors",
301
- "model.layers.19.mlp.down_proj.weight": "model.safetensors",
302
- "model.layers.19.mlp.gate_proj.biases": "model.safetensors",
303
- "model.layers.19.mlp.gate_proj.scales": "model.safetensors",
304
- "model.layers.19.mlp.gate_proj.weight": "model.safetensors",
305
- "model.layers.19.mlp.up_proj.biases": "model.safetensors",
306
- "model.layers.19.mlp.up_proj.scales": "model.safetensors",
307
- "model.layers.19.mlp.up_proj.weight": "model.safetensors",
308
- "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
309
- "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
310
- "model.layers.19.self_attn.k_proj.biases": "model.safetensors",
311
- "model.layers.19.self_attn.k_proj.scales": "model.safetensors",
312
- "model.layers.19.self_attn.k_proj.weight": "model.safetensors",
313
- "model.layers.19.self_attn.o_proj.biases": "model.safetensors",
314
- "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
315
- "model.layers.19.self_attn.o_proj.weight": "model.safetensors",
316
- "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
317
- "model.layers.19.self_attn.q_proj.biases": "model.safetensors",
318
- "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
319
- "model.layers.19.self_attn.q_proj.weight": "model.safetensors",
320
- "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
321
- "model.layers.19.self_attn.v_proj.biases": "model.safetensors",
322
- "model.layers.19.self_attn.v_proj.scales": "model.safetensors",
323
- "model.layers.19.self_attn.v_proj.weight": "model.safetensors",
324
- "model.layers.2.input_layernorm.weight": "model.safetensors",
325
- "model.layers.2.mlp.down_proj.biases": "model.safetensors",
326
- "model.layers.2.mlp.down_proj.scales": "model.safetensors",
327
- "model.layers.2.mlp.down_proj.weight": "model.safetensors",
328
- "model.layers.2.mlp.gate_proj.biases": "model.safetensors",
329
- "model.layers.2.mlp.gate_proj.scales": "model.safetensors",
330
- "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
331
- "model.layers.2.mlp.up_proj.biases": "model.safetensors",
332
- "model.layers.2.mlp.up_proj.scales": "model.safetensors",
333
- "model.layers.2.mlp.up_proj.weight": "model.safetensors",
334
- "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
335
- "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
336
- "model.layers.2.self_attn.k_proj.biases": "model.safetensors",
337
- "model.layers.2.self_attn.k_proj.scales": "model.safetensors",
338
- "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
339
- "model.layers.2.self_attn.o_proj.biases": "model.safetensors",
340
- "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
341
- "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
342
- "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
343
- "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
344
- "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
345
- "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
346
- "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
347
- "model.layers.2.self_attn.v_proj.biases": "model.safetensors",
348
- "model.layers.2.self_attn.v_proj.scales": "model.safetensors",
349
- "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
350
- "model.layers.20.input_layernorm.weight": "model.safetensors",
351
- "model.layers.20.mlp.down_proj.biases": "model.safetensors",
352
- "model.layers.20.mlp.down_proj.scales": "model.safetensors",
353
- "model.layers.20.mlp.down_proj.weight": "model.safetensors",
354
- "model.layers.20.mlp.gate_proj.biases": "model.safetensors",
355
- "model.layers.20.mlp.gate_proj.scales": "model.safetensors",
356
- "model.layers.20.mlp.gate_proj.weight": "model.safetensors",
357
- "model.layers.20.mlp.up_proj.biases": "model.safetensors",
358
- "model.layers.20.mlp.up_proj.scales": "model.safetensors",
359
- "model.layers.20.mlp.up_proj.weight": "model.safetensors",
360
- "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
361
- "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
362
- "model.layers.20.self_attn.k_proj.biases": "model.safetensors",
363
- "model.layers.20.self_attn.k_proj.scales": "model.safetensors",
364
- "model.layers.20.self_attn.k_proj.weight": "model.safetensors",
365
- "model.layers.20.self_attn.o_proj.biases": "model.safetensors",
366
- "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
367
- "model.layers.20.self_attn.o_proj.weight": "model.safetensors",
368
- "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
369
- "model.layers.20.self_attn.q_proj.biases": "model.safetensors",
370
- "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
371
- "model.layers.20.self_attn.q_proj.weight": "model.safetensors",
372
- "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
373
- "model.layers.20.self_attn.v_proj.biases": "model.safetensors",
374
- "model.layers.20.self_attn.v_proj.scales": "model.safetensors",
375
- "model.layers.20.self_attn.v_proj.weight": "model.safetensors",
376
- "model.layers.21.input_layernorm.weight": "model.safetensors",
377
- "model.layers.21.mlp.down_proj.biases": "model.safetensors",
378
- "model.layers.21.mlp.down_proj.scales": "model.safetensors",
379
- "model.layers.21.mlp.down_proj.weight": "model.safetensors",
380
- "model.layers.21.mlp.gate_proj.biases": "model.safetensors",
381
- "model.layers.21.mlp.gate_proj.scales": "model.safetensors",
382
- "model.layers.21.mlp.gate_proj.weight": "model.safetensors",
383
- "model.layers.21.mlp.up_proj.biases": "model.safetensors",
384
- "model.layers.21.mlp.up_proj.scales": "model.safetensors",
385
- "model.layers.21.mlp.up_proj.weight": "model.safetensors",
386
- "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
387
- "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
388
- "model.layers.21.self_attn.k_proj.biases": "model.safetensors",
389
- "model.layers.21.self_attn.k_proj.scales": "model.safetensors",
390
- "model.layers.21.self_attn.k_proj.weight": "model.safetensors",
391
- "model.layers.21.self_attn.o_proj.biases": "model.safetensors",
392
- "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
393
- "model.layers.21.self_attn.o_proj.weight": "model.safetensors",
394
- "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
395
- "model.layers.21.self_attn.q_proj.biases": "model.safetensors",
396
- "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
397
- "model.layers.21.self_attn.q_proj.weight": "model.safetensors",
398
- "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
399
- "model.layers.21.self_attn.v_proj.biases": "model.safetensors",
400
- "model.layers.21.self_attn.v_proj.scales": "model.safetensors",
401
- "model.layers.21.self_attn.v_proj.weight": "model.safetensors",
402
- "model.layers.22.input_layernorm.weight": "model.safetensors",
403
- "model.layers.22.mlp.down_proj.biases": "model.safetensors",
404
- "model.layers.22.mlp.down_proj.scales": "model.safetensors",
405
- "model.layers.22.mlp.down_proj.weight": "model.safetensors",
406
- "model.layers.22.mlp.gate_proj.biases": "model.safetensors",
407
- "model.layers.22.mlp.gate_proj.scales": "model.safetensors",
408
- "model.layers.22.mlp.gate_proj.weight": "model.safetensors",
409
- "model.layers.22.mlp.up_proj.biases": "model.safetensors",
410
- "model.layers.22.mlp.up_proj.scales": "model.safetensors",
411
- "model.layers.22.mlp.up_proj.weight": "model.safetensors",
412
- "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
413
- "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
414
- "model.layers.22.self_attn.k_proj.biases": "model.safetensors",
415
- "model.layers.22.self_attn.k_proj.scales": "model.safetensors",
416
- "model.layers.22.self_attn.k_proj.weight": "model.safetensors",
417
- "model.layers.22.self_attn.o_proj.biases": "model.safetensors",
418
- "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
419
- "model.layers.22.self_attn.o_proj.weight": "model.safetensors",
420
- "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
421
- "model.layers.22.self_attn.q_proj.biases": "model.safetensors",
422
- "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
423
- "model.layers.22.self_attn.q_proj.weight": "model.safetensors",
424
- "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
425
- "model.layers.22.self_attn.v_proj.biases": "model.safetensors",
426
- "model.layers.22.self_attn.v_proj.scales": "model.safetensors",
427
- "model.layers.22.self_attn.v_proj.weight": "model.safetensors",
428
- "model.layers.23.input_layernorm.weight": "model.safetensors",
429
- "model.layers.23.mlp.down_proj.biases": "model.safetensors",
430
- "model.layers.23.mlp.down_proj.scales": "model.safetensors",
431
- "model.layers.23.mlp.down_proj.weight": "model.safetensors",
432
- "model.layers.23.mlp.gate_proj.biases": "model.safetensors",
433
- "model.layers.23.mlp.gate_proj.scales": "model.safetensors",
434
- "model.layers.23.mlp.gate_proj.weight": "model.safetensors",
435
- "model.layers.23.mlp.up_proj.biases": "model.safetensors",
436
- "model.layers.23.mlp.up_proj.scales": "model.safetensors",
437
- "model.layers.23.mlp.up_proj.weight": "model.safetensors",
438
- "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
439
- "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
440
- "model.layers.23.self_attn.k_proj.biases": "model.safetensors",
441
- "model.layers.23.self_attn.k_proj.scales": "model.safetensors",
442
- "model.layers.23.self_attn.k_proj.weight": "model.safetensors",
443
- "model.layers.23.self_attn.o_proj.biases": "model.safetensors",
444
- "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
445
- "model.layers.23.self_attn.o_proj.weight": "model.safetensors",
446
- "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
447
- "model.layers.23.self_attn.q_proj.biases": "model.safetensors",
448
- "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
449
- "model.layers.23.self_attn.q_proj.weight": "model.safetensors",
450
- "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
451
- "model.layers.23.self_attn.v_proj.biases": "model.safetensors",
452
- "model.layers.23.self_attn.v_proj.scales": "model.safetensors",
453
- "model.layers.23.self_attn.v_proj.weight": "model.safetensors",
454
- "model.layers.24.input_layernorm.weight": "model.safetensors",
455
- "model.layers.24.mlp.down_proj.biases": "model.safetensors",
456
- "model.layers.24.mlp.down_proj.scales": "model.safetensors",
457
- "model.layers.24.mlp.down_proj.weight": "model.safetensors",
458
- "model.layers.24.mlp.gate_proj.biases": "model.safetensors",
459
- "model.layers.24.mlp.gate_proj.scales": "model.safetensors",
460
- "model.layers.24.mlp.gate_proj.weight": "model.safetensors",
461
- "model.layers.24.mlp.up_proj.biases": "model.safetensors",
462
- "model.layers.24.mlp.up_proj.scales": "model.safetensors",
463
- "model.layers.24.mlp.up_proj.weight": "model.safetensors",
464
- "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
465
- "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
466
- "model.layers.24.self_attn.k_proj.biases": "model.safetensors",
467
- "model.layers.24.self_attn.k_proj.scales": "model.safetensors",
468
- "model.layers.24.self_attn.k_proj.weight": "model.safetensors",
469
- "model.layers.24.self_attn.o_proj.biases": "model.safetensors",
470
- "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
471
- "model.layers.24.self_attn.o_proj.weight": "model.safetensors",
472
- "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
473
- "model.layers.24.self_attn.q_proj.biases": "model.safetensors",
474
- "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
475
- "model.layers.24.self_attn.q_proj.weight": "model.safetensors",
476
- "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
477
- "model.layers.24.self_attn.v_proj.biases": "model.safetensors",
478
- "model.layers.24.self_attn.v_proj.scales": "model.safetensors",
479
- "model.layers.24.self_attn.v_proj.weight": "model.safetensors",
480
- "model.layers.25.input_layernorm.weight": "model.safetensors",
481
- "model.layers.25.mlp.down_proj.biases": "model.safetensors",
482
- "model.layers.25.mlp.down_proj.scales": "model.safetensors",
483
- "model.layers.25.mlp.down_proj.weight": "model.safetensors",
484
- "model.layers.25.mlp.gate_proj.biases": "model.safetensors",
485
- "model.layers.25.mlp.gate_proj.scales": "model.safetensors",
486
- "model.layers.25.mlp.gate_proj.weight": "model.safetensors",
487
- "model.layers.25.mlp.up_proj.biases": "model.safetensors",
488
- "model.layers.25.mlp.up_proj.scales": "model.safetensors",
489
- "model.layers.25.mlp.up_proj.weight": "model.safetensors",
490
- "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
491
- "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
492
- "model.layers.25.self_attn.k_proj.biases": "model.safetensors",
493
- "model.layers.25.self_attn.k_proj.scales": "model.safetensors",
494
- "model.layers.25.self_attn.k_proj.weight": "model.safetensors",
495
- "model.layers.25.self_attn.o_proj.biases": "model.safetensors",
496
- "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
497
- "model.layers.25.self_attn.o_proj.weight": "model.safetensors",
498
- "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
499
- "model.layers.25.self_attn.q_proj.biases": "model.safetensors",
500
- "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
501
- "model.layers.25.self_attn.q_proj.weight": "model.safetensors",
502
- "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
503
- "model.layers.25.self_attn.v_proj.biases": "model.safetensors",
504
- "model.layers.25.self_attn.v_proj.scales": "model.safetensors",
505
- "model.layers.25.self_attn.v_proj.weight": "model.safetensors",
506
- "model.layers.26.input_layernorm.weight": "model.safetensors",
507
- "model.layers.26.mlp.down_proj.biases": "model.safetensors",
508
- "model.layers.26.mlp.down_proj.scales": "model.safetensors",
509
- "model.layers.26.mlp.down_proj.weight": "model.safetensors",
510
- "model.layers.26.mlp.gate_proj.biases": "model.safetensors",
511
- "model.layers.26.mlp.gate_proj.scales": "model.safetensors",
512
- "model.layers.26.mlp.gate_proj.weight": "model.safetensors",
513
- "model.layers.26.mlp.up_proj.biases": "model.safetensors",
514
- "model.layers.26.mlp.up_proj.scales": "model.safetensors",
515
- "model.layers.26.mlp.up_proj.weight": "model.safetensors",
516
- "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
517
- "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
518
- "model.layers.26.self_attn.k_proj.biases": "model.safetensors",
519
- "model.layers.26.self_attn.k_proj.scales": "model.safetensors",
520
- "model.layers.26.self_attn.k_proj.weight": "model.safetensors",
521
- "model.layers.26.self_attn.o_proj.biases": "model.safetensors",
522
- "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
523
- "model.layers.26.self_attn.o_proj.weight": "model.safetensors",
524
- "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
525
- "model.layers.26.self_attn.q_proj.biases": "model.safetensors",
526
- "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
527
- "model.layers.26.self_attn.q_proj.weight": "model.safetensors",
528
- "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
529
- "model.layers.26.self_attn.v_proj.biases": "model.safetensors",
530
- "model.layers.26.self_attn.v_proj.scales": "model.safetensors",
531
- "model.layers.26.self_attn.v_proj.weight": "model.safetensors",
532
- "model.layers.27.input_layernorm.weight": "model.safetensors",
533
- "model.layers.27.mlp.down_proj.biases": "model.safetensors",
534
- "model.layers.27.mlp.down_proj.scales": "model.safetensors",
535
- "model.layers.27.mlp.down_proj.weight": "model.safetensors",
536
- "model.layers.27.mlp.gate_proj.biases": "model.safetensors",
537
- "model.layers.27.mlp.gate_proj.scales": "model.safetensors",
538
- "model.layers.27.mlp.gate_proj.weight": "model.safetensors",
539
- "model.layers.27.mlp.up_proj.biases": "model.safetensors",
540
- "model.layers.27.mlp.up_proj.scales": "model.safetensors",
541
- "model.layers.27.mlp.up_proj.weight": "model.safetensors",
542
- "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
543
- "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
544
- "model.layers.27.self_attn.k_proj.biases": "model.safetensors",
545
- "model.layers.27.self_attn.k_proj.scales": "model.safetensors",
546
- "model.layers.27.self_attn.k_proj.weight": "model.safetensors",
547
- "model.layers.27.self_attn.o_proj.biases": "model.safetensors",
548
- "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
549
- "model.layers.27.self_attn.o_proj.weight": "model.safetensors",
550
- "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
551
- "model.layers.27.self_attn.q_proj.biases": "model.safetensors",
552
- "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
553
- "model.layers.27.self_attn.q_proj.weight": "model.safetensors",
554
- "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
555
- "model.layers.27.self_attn.v_proj.biases": "model.safetensors",
556
- "model.layers.27.self_attn.v_proj.scales": "model.safetensors",
557
- "model.layers.27.self_attn.v_proj.weight": "model.safetensors",
558
- "model.layers.3.input_layernorm.weight": "model.safetensors",
559
- "model.layers.3.mlp.down_proj.biases": "model.safetensors",
560
- "model.layers.3.mlp.down_proj.scales": "model.safetensors",
561
- "model.layers.3.mlp.down_proj.weight": "model.safetensors",
562
- "model.layers.3.mlp.gate_proj.biases": "model.safetensors",
563
- "model.layers.3.mlp.gate_proj.scales": "model.safetensors",
564
- "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
565
- "model.layers.3.mlp.up_proj.biases": "model.safetensors",
566
- "model.layers.3.mlp.up_proj.scales": "model.safetensors",
567
- "model.layers.3.mlp.up_proj.weight": "model.safetensors",
568
- "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
569
- "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
570
- "model.layers.3.self_attn.k_proj.biases": "model.safetensors",
571
- "model.layers.3.self_attn.k_proj.scales": "model.safetensors",
572
- "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
573
- "model.layers.3.self_attn.o_proj.biases": "model.safetensors",
574
- "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
575
- "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
576
- "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
577
- "model.layers.3.self_attn.q_proj.biases": "model.safetensors",
578
- "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
579
- "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
580
- "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
581
- "model.layers.3.self_attn.v_proj.biases": "model.safetensors",
582
- "model.layers.3.self_attn.v_proj.scales": "model.safetensors",
583
- "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
584
- "model.layers.4.input_layernorm.weight": "model.safetensors",
585
- "model.layers.4.mlp.down_proj.biases": "model.safetensors",
586
- "model.layers.4.mlp.down_proj.scales": "model.safetensors",
587
- "model.layers.4.mlp.down_proj.weight": "model.safetensors",
588
- "model.layers.4.mlp.gate_proj.biases": "model.safetensors",
589
- "model.layers.4.mlp.gate_proj.scales": "model.safetensors",
590
- "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
591
- "model.layers.4.mlp.up_proj.biases": "model.safetensors",
592
- "model.layers.4.mlp.up_proj.scales": "model.safetensors",
593
- "model.layers.4.mlp.up_proj.weight": "model.safetensors",
594
- "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
595
- "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
596
- "model.layers.4.self_attn.k_proj.biases": "model.safetensors",
597
- "model.layers.4.self_attn.k_proj.scales": "model.safetensors",
598
- "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
599
- "model.layers.4.self_attn.o_proj.biases": "model.safetensors",
600
- "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
601
- "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
602
- "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
603
- "model.layers.4.self_attn.q_proj.biases": "model.safetensors",
604
- "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
605
- "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
606
- "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
607
- "model.layers.4.self_attn.v_proj.biases": "model.safetensors",
608
- "model.layers.4.self_attn.v_proj.scales": "model.safetensors",
609
- "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
610
- "model.layers.5.input_layernorm.weight": "model.safetensors",
611
- "model.layers.5.mlp.down_proj.biases": "model.safetensors",
612
- "model.layers.5.mlp.down_proj.scales": "model.safetensors",
613
- "model.layers.5.mlp.down_proj.weight": "model.safetensors",
614
- "model.layers.5.mlp.gate_proj.biases": "model.safetensors",
615
- "model.layers.5.mlp.gate_proj.scales": "model.safetensors",
616
- "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
617
- "model.layers.5.mlp.up_proj.biases": "model.safetensors",
618
- "model.layers.5.mlp.up_proj.scales": "model.safetensors",
619
- "model.layers.5.mlp.up_proj.weight": "model.safetensors",
620
- "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
621
- "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
622
- "model.layers.5.self_attn.k_proj.biases": "model.safetensors",
623
- "model.layers.5.self_attn.k_proj.scales": "model.safetensors",
624
- "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
625
- "model.layers.5.self_attn.o_proj.biases": "model.safetensors",
626
- "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
627
- "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
628
- "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
629
- "model.layers.5.self_attn.q_proj.biases": "model.safetensors",
630
- "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
631
- "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
632
- "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
633
- "model.layers.5.self_attn.v_proj.biases": "model.safetensors",
634
- "model.layers.5.self_attn.v_proj.scales": "model.safetensors",
635
- "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
636
- "model.layers.6.input_layernorm.weight": "model.safetensors",
637
- "model.layers.6.mlp.down_proj.biases": "model.safetensors",
638
- "model.layers.6.mlp.down_proj.scales": "model.safetensors",
639
- "model.layers.6.mlp.down_proj.weight": "model.safetensors",
640
- "model.layers.6.mlp.gate_proj.biases": "model.safetensors",
641
- "model.layers.6.mlp.gate_proj.scales": "model.safetensors",
642
- "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
643
- "model.layers.6.mlp.up_proj.biases": "model.safetensors",
644
- "model.layers.6.mlp.up_proj.scales": "model.safetensors",
645
- "model.layers.6.mlp.up_proj.weight": "model.safetensors",
646
- "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
647
- "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
648
- "model.layers.6.self_attn.k_proj.biases": "model.safetensors",
649
- "model.layers.6.self_attn.k_proj.scales": "model.safetensors",
650
- "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
651
- "model.layers.6.self_attn.o_proj.biases": "model.safetensors",
652
- "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
653
- "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
654
- "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
655
- "model.layers.6.self_attn.q_proj.biases": "model.safetensors",
656
- "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
657
- "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
658
- "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
659
- "model.layers.6.self_attn.v_proj.biases": "model.safetensors",
660
- "model.layers.6.self_attn.v_proj.scales": "model.safetensors",
661
- "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
662
- "model.layers.7.input_layernorm.weight": "model.safetensors",
663
- "model.layers.7.mlp.down_proj.biases": "model.safetensors",
664
- "model.layers.7.mlp.down_proj.scales": "model.safetensors",
665
- "model.layers.7.mlp.down_proj.weight": "model.safetensors",
666
- "model.layers.7.mlp.gate_proj.biases": "model.safetensors",
667
- "model.layers.7.mlp.gate_proj.scales": "model.safetensors",
668
- "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
669
- "model.layers.7.mlp.up_proj.biases": "model.safetensors",
670
- "model.layers.7.mlp.up_proj.scales": "model.safetensors",
671
- "model.layers.7.mlp.up_proj.weight": "model.safetensors",
672
- "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
673
- "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
674
- "model.layers.7.self_attn.k_proj.biases": "model.safetensors",
675
- "model.layers.7.self_attn.k_proj.scales": "model.safetensors",
676
- "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
677
- "model.layers.7.self_attn.o_proj.biases": "model.safetensors",
678
- "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
679
- "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
680
- "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
681
- "model.layers.7.self_attn.q_proj.biases": "model.safetensors",
682
- "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
683
- "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
684
- "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
685
- "model.layers.7.self_attn.v_proj.biases": "model.safetensors",
686
- "model.layers.7.self_attn.v_proj.scales": "model.safetensors",
687
- "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
688
- "model.layers.8.input_layernorm.weight": "model.safetensors",
689
- "model.layers.8.mlp.down_proj.biases": "model.safetensors",
690
- "model.layers.8.mlp.down_proj.scales": "model.safetensors",
691
- "model.layers.8.mlp.down_proj.weight": "model.safetensors",
692
- "model.layers.8.mlp.gate_proj.biases": "model.safetensors",
693
- "model.layers.8.mlp.gate_proj.scales": "model.safetensors",
694
- "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
695
- "model.layers.8.mlp.up_proj.biases": "model.safetensors",
696
- "model.layers.8.mlp.up_proj.scales": "model.safetensors",
697
- "model.layers.8.mlp.up_proj.weight": "model.safetensors",
698
- "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
699
- "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
700
- "model.layers.8.self_attn.k_proj.biases": "model.safetensors",
701
- "model.layers.8.self_attn.k_proj.scales": "model.safetensors",
702
- "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
703
- "model.layers.8.self_attn.o_proj.biases": "model.safetensors",
704
- "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
705
- "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
706
- "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
707
- "model.layers.8.self_attn.q_proj.biases": "model.safetensors",
708
- "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
709
- "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
710
- "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
711
- "model.layers.8.self_attn.v_proj.biases": "model.safetensors",
712
- "model.layers.8.self_attn.v_proj.scales": "model.safetensors",
713
- "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
714
- "model.layers.9.input_layernorm.weight": "model.safetensors",
715
- "model.layers.9.mlp.down_proj.biases": "model.safetensors",
716
- "model.layers.9.mlp.down_proj.scales": "model.safetensors",
717
- "model.layers.9.mlp.down_proj.weight": "model.safetensors",
718
- "model.layers.9.mlp.gate_proj.biases": "model.safetensors",
719
- "model.layers.9.mlp.gate_proj.scales": "model.safetensors",
720
- "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
721
- "model.layers.9.mlp.up_proj.biases": "model.safetensors",
722
- "model.layers.9.mlp.up_proj.scales": "model.safetensors",
723
- "model.layers.9.mlp.up_proj.weight": "model.safetensors",
724
- "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
725
- "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
726
- "model.layers.9.self_attn.k_proj.biases": "model.safetensors",
727
- "model.layers.9.self_attn.k_proj.scales": "model.safetensors",
728
- "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
729
- "model.layers.9.self_attn.o_proj.biases": "model.safetensors",
730
- "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
731
- "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
732
- "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
733
- "model.layers.9.self_attn.q_proj.biases": "model.safetensors",
734
- "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
735
- "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
736
- "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
737
- "model.layers.9.self_attn.v_proj.biases": "model.safetensors",
738
- "model.layers.9.self_attn.v_proj.scales": "model.safetensors",
739
- "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
740
- "model.norm.weight": "model.safetensors"
741
- }
742
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4,8_mixed/tokenizer_config.json DELETED
@@ -1,195 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "151643": {
7
- "content": "<|end▁of▁sentence|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "151644": {
15
- "content": "<|User|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "151645": {
23
- "content": "<|Assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "151646": {
31
- "content": "<|begin▁of▁sentence|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "151647": {
39
- "content": "<|EOT|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "151648": {
47
- "content": "<think>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "151649": {
55
- "content": "</think>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "151650": {
63
- "content": "<|quad_start|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "151651": {
71
- "content": "<|quad_end|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "151652": {
79
- "content": "<|vision_start|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "151653": {
87
- "content": "<|vision_end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "151654": {
95
- "content": "<|vision_pad|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "151655": {
103
- "content": "<|image_pad|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "151656": {
111
- "content": "<|video_pad|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "151657": {
119
- "content": "<tool_call>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "151658": {
127
- "content": "</tool_call>",
128
- "lstrip": false,
129
- "normalized": false,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "151659": {
135
- "content": "<|fim_prefix|>",
136
- "lstrip": false,
137
- "normalized": false,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "151660": {
143
- "content": "<|fim_middle|>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "151661": {
151
- "content": "<|fim_suffix|>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "151662": {
159
- "content": "<|fim_pad|>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "151663": {
167
- "content": "<|repo_name|>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "151664": {
175
- "content": "<|file_sep|>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- }
182
- },
183
- "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
- "clean_up_tokenization_spaces": false,
186
- "eos_token": "<|end▁of▁sentence|>",
187
- "extra_special_tokens": {},
188
- "legacy": true,
189
- "model_max_length": 16384,
190
- "pad_token": "<|end▁of▁sentence|>",
191
- "sp_model_kwargs": {},
192
- "tokenizer_class": "LlamaTokenizerFast",
193
- "unk_token": null,
194
- "use_default_system_prompt": false
195
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4bit/config.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen2ForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization": {
19
- "group_size": 64,
20
- "bits": 4
21
- },
22
- "quantization_config": {
23
- "group_size": 64,
24
- "bits": 4
25
- },
26
- "rms_norm_eps": 1e-06,
27
- "rope_theta": 10000,
28
- "sliding_window": 4096,
29
- "tie_word_embeddings": false,
30
- "torch_dtype": "bfloat16",
31
- "transformers_version": "4.44.0",
32
- "use_cache": true,
33
- "use_mrope": false,
34
- "use_sliding_window": false,
35
- "vocab_size": 152064
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4bit/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f9acc3ffe5a5610b6df2902d0a2262c728b1abdb1d7eafa563523122d9af60f
3
- size 4284346187
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4bit/model.safetensors.index.json DELETED
@@ -1,742 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 4284263424
4
- },
5
- "weight_map": {
6
- "lm_head.biases": "model.safetensors",
7
- "lm_head.scales": "model.safetensors",
8
- "lm_head.weight": "model.safetensors",
9
- "model.embed_tokens.biases": "model.safetensors",
10
- "model.embed_tokens.scales": "model.safetensors",
11
- "model.embed_tokens.weight": "model.safetensors",
12
- "model.layers.0.input_layernorm.weight": "model.safetensors",
13
- "model.layers.0.mlp.down_proj.biases": "model.safetensors",
14
- "model.layers.0.mlp.down_proj.scales": "model.safetensors",
15
- "model.layers.0.mlp.down_proj.weight": "model.safetensors",
16
- "model.layers.0.mlp.gate_proj.biases": "model.safetensors",
17
- "model.layers.0.mlp.gate_proj.scales": "model.safetensors",
18
- "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
19
- "model.layers.0.mlp.up_proj.biases": "model.safetensors",
20
- "model.layers.0.mlp.up_proj.scales": "model.safetensors",
21
- "model.layers.0.mlp.up_proj.weight": "model.safetensors",
22
- "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
23
- "model.layers.0.self_attn.k_proj.bias": "model.safetensors",
24
- "model.layers.0.self_attn.k_proj.biases": "model.safetensors",
25
- "model.layers.0.self_attn.k_proj.scales": "model.safetensors",
26
- "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
27
- "model.layers.0.self_attn.o_proj.biases": "model.safetensors",
28
- "model.layers.0.self_attn.o_proj.scales": "model.safetensors",
29
- "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
30
- "model.layers.0.self_attn.q_proj.bias": "model.safetensors",
31
- "model.layers.0.self_attn.q_proj.biases": "model.safetensors",
32
- "model.layers.0.self_attn.q_proj.scales": "model.safetensors",
33
- "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
34
- "model.layers.0.self_attn.v_proj.bias": "model.safetensors",
35
- "model.layers.0.self_attn.v_proj.biases": "model.safetensors",
36
- "model.layers.0.self_attn.v_proj.scales": "model.safetensors",
37
- "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
38
- "model.layers.1.input_layernorm.weight": "model.safetensors",
39
- "model.layers.1.mlp.down_proj.biases": "model.safetensors",
40
- "model.layers.1.mlp.down_proj.scales": "model.safetensors",
41
- "model.layers.1.mlp.down_proj.weight": "model.safetensors",
42
- "model.layers.1.mlp.gate_proj.biases": "model.safetensors",
43
- "model.layers.1.mlp.gate_proj.scales": "model.safetensors",
44
- "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
45
- "model.layers.1.mlp.up_proj.biases": "model.safetensors",
46
- "model.layers.1.mlp.up_proj.scales": "model.safetensors",
47
- "model.layers.1.mlp.up_proj.weight": "model.safetensors",
48
- "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
49
- "model.layers.1.self_attn.k_proj.bias": "model.safetensors",
50
- "model.layers.1.self_attn.k_proj.biases": "model.safetensors",
51
- "model.layers.1.self_attn.k_proj.scales": "model.safetensors",
52
- "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
53
- "model.layers.1.self_attn.o_proj.biases": "model.safetensors",
54
- "model.layers.1.self_attn.o_proj.scales": "model.safetensors",
55
- "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
56
- "model.layers.1.self_attn.q_proj.bias": "model.safetensors",
57
- "model.layers.1.self_attn.q_proj.biases": "model.safetensors",
58
- "model.layers.1.self_attn.q_proj.scales": "model.safetensors",
59
- "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
60
- "model.layers.1.self_attn.v_proj.bias": "model.safetensors",
61
- "model.layers.1.self_attn.v_proj.biases": "model.safetensors",
62
- "model.layers.1.self_attn.v_proj.scales": "model.safetensors",
63
- "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
64
- "model.layers.10.input_layernorm.weight": "model.safetensors",
65
- "model.layers.10.mlp.down_proj.biases": "model.safetensors",
66
- "model.layers.10.mlp.down_proj.scales": "model.safetensors",
67
- "model.layers.10.mlp.down_proj.weight": "model.safetensors",
68
- "model.layers.10.mlp.gate_proj.biases": "model.safetensors",
69
- "model.layers.10.mlp.gate_proj.scales": "model.safetensors",
70
- "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
71
- "model.layers.10.mlp.up_proj.biases": "model.safetensors",
72
- "model.layers.10.mlp.up_proj.scales": "model.safetensors",
73
- "model.layers.10.mlp.up_proj.weight": "model.safetensors",
74
- "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
75
- "model.layers.10.self_attn.k_proj.bias": "model.safetensors",
76
- "model.layers.10.self_attn.k_proj.biases": "model.safetensors",
77
- "model.layers.10.self_attn.k_proj.scales": "model.safetensors",
78
- "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
79
- "model.layers.10.self_attn.o_proj.biases": "model.safetensors",
80
- "model.layers.10.self_attn.o_proj.scales": "model.safetensors",
81
- "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
82
- "model.layers.10.self_attn.q_proj.bias": "model.safetensors",
83
- "model.layers.10.self_attn.q_proj.biases": "model.safetensors",
84
- "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
85
- "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
86
- "model.layers.10.self_attn.v_proj.bias": "model.safetensors",
87
- "model.layers.10.self_attn.v_proj.biases": "model.safetensors",
88
- "model.layers.10.self_attn.v_proj.scales": "model.safetensors",
89
- "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
90
- "model.layers.11.input_layernorm.weight": "model.safetensors",
91
- "model.layers.11.mlp.down_proj.biases": "model.safetensors",
92
- "model.layers.11.mlp.down_proj.scales": "model.safetensors",
93
- "model.layers.11.mlp.down_proj.weight": "model.safetensors",
94
- "model.layers.11.mlp.gate_proj.biases": "model.safetensors",
95
- "model.layers.11.mlp.gate_proj.scales": "model.safetensors",
96
- "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
97
- "model.layers.11.mlp.up_proj.biases": "model.safetensors",
98
- "model.layers.11.mlp.up_proj.scales": "model.safetensors",
99
- "model.layers.11.mlp.up_proj.weight": "model.safetensors",
100
- "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
101
- "model.layers.11.self_attn.k_proj.bias": "model.safetensors",
102
- "model.layers.11.self_attn.k_proj.biases": "model.safetensors",
103
- "model.layers.11.self_attn.k_proj.scales": "model.safetensors",
104
- "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
105
- "model.layers.11.self_attn.o_proj.biases": "model.safetensors",
106
- "model.layers.11.self_attn.o_proj.scales": "model.safetensors",
107
- "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
108
- "model.layers.11.self_attn.q_proj.bias": "model.safetensors",
109
- "model.layers.11.self_attn.q_proj.biases": "model.safetensors",
110
- "model.layers.11.self_attn.q_proj.scales": "model.safetensors",
111
- "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
112
- "model.layers.11.self_attn.v_proj.bias": "model.safetensors",
113
- "model.layers.11.self_attn.v_proj.biases": "model.safetensors",
114
- "model.layers.11.self_attn.v_proj.scales": "model.safetensors",
115
- "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
116
- "model.layers.12.input_layernorm.weight": "model.safetensors",
117
- "model.layers.12.mlp.down_proj.biases": "model.safetensors",
118
- "model.layers.12.mlp.down_proj.scales": "model.safetensors",
119
- "model.layers.12.mlp.down_proj.weight": "model.safetensors",
120
- "model.layers.12.mlp.gate_proj.biases": "model.safetensors",
121
- "model.layers.12.mlp.gate_proj.scales": "model.safetensors",
122
- "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
123
- "model.layers.12.mlp.up_proj.biases": "model.safetensors",
124
- "model.layers.12.mlp.up_proj.scales": "model.safetensors",
125
- "model.layers.12.mlp.up_proj.weight": "model.safetensors",
126
- "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
127
- "model.layers.12.self_attn.k_proj.bias": "model.safetensors",
128
- "model.layers.12.self_attn.k_proj.biases": "model.safetensors",
129
- "model.layers.12.self_attn.k_proj.scales": "model.safetensors",
130
- "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
131
- "model.layers.12.self_attn.o_proj.biases": "model.safetensors",
132
- "model.layers.12.self_attn.o_proj.scales": "model.safetensors",
133
- "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
134
- "model.layers.12.self_attn.q_proj.bias": "model.safetensors",
135
- "model.layers.12.self_attn.q_proj.biases": "model.safetensors",
136
- "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
137
- "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
138
- "model.layers.12.self_attn.v_proj.bias": "model.safetensors",
139
- "model.layers.12.self_attn.v_proj.biases": "model.safetensors",
140
- "model.layers.12.self_attn.v_proj.scales": "model.safetensors",
141
- "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
142
- "model.layers.13.input_layernorm.weight": "model.safetensors",
143
- "model.layers.13.mlp.down_proj.biases": "model.safetensors",
144
- "model.layers.13.mlp.down_proj.scales": "model.safetensors",
145
- "model.layers.13.mlp.down_proj.weight": "model.safetensors",
146
- "model.layers.13.mlp.gate_proj.biases": "model.safetensors",
147
- "model.layers.13.mlp.gate_proj.scales": "model.safetensors",
148
- "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
149
- "model.layers.13.mlp.up_proj.biases": "model.safetensors",
150
- "model.layers.13.mlp.up_proj.scales": "model.safetensors",
151
- "model.layers.13.mlp.up_proj.weight": "model.safetensors",
152
- "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
153
- "model.layers.13.self_attn.k_proj.bias": "model.safetensors",
154
- "model.layers.13.self_attn.k_proj.biases": "model.safetensors",
155
- "model.layers.13.self_attn.k_proj.scales": "model.safetensors",
156
- "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
157
- "model.layers.13.self_attn.o_proj.biases": "model.safetensors",
158
- "model.layers.13.self_attn.o_proj.scales": "model.safetensors",
159
- "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
160
- "model.layers.13.self_attn.q_proj.bias": "model.safetensors",
161
- "model.layers.13.self_attn.q_proj.biases": "model.safetensors",
162
- "model.layers.13.self_attn.q_proj.scales": "model.safetensors",
163
- "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
164
- "model.layers.13.self_attn.v_proj.bias": "model.safetensors",
165
- "model.layers.13.self_attn.v_proj.biases": "model.safetensors",
166
- "model.layers.13.self_attn.v_proj.scales": "model.safetensors",
167
- "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
168
- "model.layers.14.input_layernorm.weight": "model.safetensors",
169
- "model.layers.14.mlp.down_proj.biases": "model.safetensors",
170
- "model.layers.14.mlp.down_proj.scales": "model.safetensors",
171
- "model.layers.14.mlp.down_proj.weight": "model.safetensors",
172
- "model.layers.14.mlp.gate_proj.biases": "model.safetensors",
173
- "model.layers.14.mlp.gate_proj.scales": "model.safetensors",
174
- "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
175
- "model.layers.14.mlp.up_proj.biases": "model.safetensors",
176
- "model.layers.14.mlp.up_proj.scales": "model.safetensors",
177
- "model.layers.14.mlp.up_proj.weight": "model.safetensors",
178
- "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
179
- "model.layers.14.self_attn.k_proj.bias": "model.safetensors",
180
- "model.layers.14.self_attn.k_proj.biases": "model.safetensors",
181
- "model.layers.14.self_attn.k_proj.scales": "model.safetensors",
182
- "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
183
- "model.layers.14.self_attn.o_proj.biases": "model.safetensors",
184
- "model.layers.14.self_attn.o_proj.scales": "model.safetensors",
185
- "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
186
- "model.layers.14.self_attn.q_proj.bias": "model.safetensors",
187
- "model.layers.14.self_attn.q_proj.biases": "model.safetensors",
188
- "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
189
- "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
190
- "model.layers.14.self_attn.v_proj.bias": "model.safetensors",
191
- "model.layers.14.self_attn.v_proj.biases": "model.safetensors",
192
- "model.layers.14.self_attn.v_proj.scales": "model.safetensors",
193
- "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
194
- "model.layers.15.input_layernorm.weight": "model.safetensors",
195
- "model.layers.15.mlp.down_proj.biases": "model.safetensors",
196
- "model.layers.15.mlp.down_proj.scales": "model.safetensors",
197
- "model.layers.15.mlp.down_proj.weight": "model.safetensors",
198
- "model.layers.15.mlp.gate_proj.biases": "model.safetensors",
199
- "model.layers.15.mlp.gate_proj.scales": "model.safetensors",
200
- "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
201
- "model.layers.15.mlp.up_proj.biases": "model.safetensors",
202
- "model.layers.15.mlp.up_proj.scales": "model.safetensors",
203
- "model.layers.15.mlp.up_proj.weight": "model.safetensors",
204
- "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
205
- "model.layers.15.self_attn.k_proj.bias": "model.safetensors",
206
- "model.layers.15.self_attn.k_proj.biases": "model.safetensors",
207
- "model.layers.15.self_attn.k_proj.scales": "model.safetensors",
208
- "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
209
- "model.layers.15.self_attn.o_proj.biases": "model.safetensors",
210
- "model.layers.15.self_attn.o_proj.scales": "model.safetensors",
211
- "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
212
- "model.layers.15.self_attn.q_proj.bias": "model.safetensors",
213
- "model.layers.15.self_attn.q_proj.biases": "model.safetensors",
214
- "model.layers.15.self_attn.q_proj.scales": "model.safetensors",
215
- "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
216
- "model.layers.15.self_attn.v_proj.bias": "model.safetensors",
217
- "model.layers.15.self_attn.v_proj.biases": "model.safetensors",
218
- "model.layers.15.self_attn.v_proj.scales": "model.safetensors",
219
- "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
220
- "model.layers.16.input_layernorm.weight": "model.safetensors",
221
- "model.layers.16.mlp.down_proj.biases": "model.safetensors",
222
- "model.layers.16.mlp.down_proj.scales": "model.safetensors",
223
- "model.layers.16.mlp.down_proj.weight": "model.safetensors",
224
- "model.layers.16.mlp.gate_proj.biases": "model.safetensors",
225
- "model.layers.16.mlp.gate_proj.scales": "model.safetensors",
226
- "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
227
- "model.layers.16.mlp.up_proj.biases": "model.safetensors",
228
- "model.layers.16.mlp.up_proj.scales": "model.safetensors",
229
- "model.layers.16.mlp.up_proj.weight": "model.safetensors",
230
- "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
231
- "model.layers.16.self_attn.k_proj.bias": "model.safetensors",
232
- "model.layers.16.self_attn.k_proj.biases": "model.safetensors",
233
- "model.layers.16.self_attn.k_proj.scales": "model.safetensors",
234
- "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
235
- "model.layers.16.self_attn.o_proj.biases": "model.safetensors",
236
- "model.layers.16.self_attn.o_proj.scales": "model.safetensors",
237
- "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
238
- "model.layers.16.self_attn.q_proj.bias": "model.safetensors",
239
- "model.layers.16.self_attn.q_proj.biases": "model.safetensors",
240
- "model.layers.16.self_attn.q_proj.scales": "model.safetensors",
241
- "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
242
- "model.layers.16.self_attn.v_proj.bias": "model.safetensors",
243
- "model.layers.16.self_attn.v_proj.biases": "model.safetensors",
244
- "model.layers.16.self_attn.v_proj.scales": "model.safetensors",
245
- "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
246
- "model.layers.17.input_layernorm.weight": "model.safetensors",
247
- "model.layers.17.mlp.down_proj.biases": "model.safetensors",
248
- "model.layers.17.mlp.down_proj.scales": "model.safetensors",
249
- "model.layers.17.mlp.down_proj.weight": "model.safetensors",
250
- "model.layers.17.mlp.gate_proj.biases": "model.safetensors",
251
- "model.layers.17.mlp.gate_proj.scales": "model.safetensors",
252
- "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
253
- "model.layers.17.mlp.up_proj.biases": "model.safetensors",
254
- "model.layers.17.mlp.up_proj.scales": "model.safetensors",
255
- "model.layers.17.mlp.up_proj.weight": "model.safetensors",
256
- "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
257
- "model.layers.17.self_attn.k_proj.bias": "model.safetensors",
258
- "model.layers.17.self_attn.k_proj.biases": "model.safetensors",
259
- "model.layers.17.self_attn.k_proj.scales": "model.safetensors",
260
- "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
261
- "model.layers.17.self_attn.o_proj.biases": "model.safetensors",
262
- "model.layers.17.self_attn.o_proj.scales": "model.safetensors",
263
- "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
264
- "model.layers.17.self_attn.q_proj.bias": "model.safetensors",
265
- "model.layers.17.self_attn.q_proj.biases": "model.safetensors",
266
- "model.layers.17.self_attn.q_proj.scales": "model.safetensors",
267
- "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
268
- "model.layers.17.self_attn.v_proj.bias": "model.safetensors",
269
- "model.layers.17.self_attn.v_proj.biases": "model.safetensors",
270
- "model.layers.17.self_attn.v_proj.scales": "model.safetensors",
271
- "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
272
- "model.layers.18.input_layernorm.weight": "model.safetensors",
273
- "model.layers.18.mlp.down_proj.biases": "model.safetensors",
274
- "model.layers.18.mlp.down_proj.scales": "model.safetensors",
275
- "model.layers.18.mlp.down_proj.weight": "model.safetensors",
276
- "model.layers.18.mlp.gate_proj.biases": "model.safetensors",
277
- "model.layers.18.mlp.gate_proj.scales": "model.safetensors",
278
- "model.layers.18.mlp.gate_proj.weight": "model.safetensors",
279
- "model.layers.18.mlp.up_proj.biases": "model.safetensors",
280
- "model.layers.18.mlp.up_proj.scales": "model.safetensors",
281
- "model.layers.18.mlp.up_proj.weight": "model.safetensors",
282
- "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
283
- "model.layers.18.self_attn.k_proj.bias": "model.safetensors",
284
- "model.layers.18.self_attn.k_proj.biases": "model.safetensors",
285
- "model.layers.18.self_attn.k_proj.scales": "model.safetensors",
286
- "model.layers.18.self_attn.k_proj.weight": "model.safetensors",
287
- "model.layers.18.self_attn.o_proj.biases": "model.safetensors",
288
- "model.layers.18.self_attn.o_proj.scales": "model.safetensors",
289
- "model.layers.18.self_attn.o_proj.weight": "model.safetensors",
290
- "model.layers.18.self_attn.q_proj.bias": "model.safetensors",
291
- "model.layers.18.self_attn.q_proj.biases": "model.safetensors",
292
- "model.layers.18.self_attn.q_proj.scales": "model.safetensors",
293
- "model.layers.18.self_attn.q_proj.weight": "model.safetensors",
294
- "model.layers.18.self_attn.v_proj.bias": "model.safetensors",
295
- "model.layers.18.self_attn.v_proj.biases": "model.safetensors",
296
- "model.layers.18.self_attn.v_proj.scales": "model.safetensors",
297
- "model.layers.18.self_attn.v_proj.weight": "model.safetensors",
298
- "model.layers.19.input_layernorm.weight": "model.safetensors",
299
- "model.layers.19.mlp.down_proj.biases": "model.safetensors",
300
- "model.layers.19.mlp.down_proj.scales": "model.safetensors",
301
- "model.layers.19.mlp.down_proj.weight": "model.safetensors",
302
- "model.layers.19.mlp.gate_proj.biases": "model.safetensors",
303
- "model.layers.19.mlp.gate_proj.scales": "model.safetensors",
304
- "model.layers.19.mlp.gate_proj.weight": "model.safetensors",
305
- "model.layers.19.mlp.up_proj.biases": "model.safetensors",
306
- "model.layers.19.mlp.up_proj.scales": "model.safetensors",
307
- "model.layers.19.mlp.up_proj.weight": "model.safetensors",
308
- "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
309
- "model.layers.19.self_attn.k_proj.bias": "model.safetensors",
310
- "model.layers.19.self_attn.k_proj.biases": "model.safetensors",
311
- "model.layers.19.self_attn.k_proj.scales": "model.safetensors",
312
- "model.layers.19.self_attn.k_proj.weight": "model.safetensors",
313
- "model.layers.19.self_attn.o_proj.biases": "model.safetensors",
314
- "model.layers.19.self_attn.o_proj.scales": "model.safetensors",
315
- "model.layers.19.self_attn.o_proj.weight": "model.safetensors",
316
- "model.layers.19.self_attn.q_proj.bias": "model.safetensors",
317
- "model.layers.19.self_attn.q_proj.biases": "model.safetensors",
318
- "model.layers.19.self_attn.q_proj.scales": "model.safetensors",
319
- "model.layers.19.self_attn.q_proj.weight": "model.safetensors",
320
- "model.layers.19.self_attn.v_proj.bias": "model.safetensors",
321
- "model.layers.19.self_attn.v_proj.biases": "model.safetensors",
322
- "model.layers.19.self_attn.v_proj.scales": "model.safetensors",
323
- "model.layers.19.self_attn.v_proj.weight": "model.safetensors",
324
- "model.layers.2.input_layernorm.weight": "model.safetensors",
325
- "model.layers.2.mlp.down_proj.biases": "model.safetensors",
326
- "model.layers.2.mlp.down_proj.scales": "model.safetensors",
327
- "model.layers.2.mlp.down_proj.weight": "model.safetensors",
328
- "model.layers.2.mlp.gate_proj.biases": "model.safetensors",
329
- "model.layers.2.mlp.gate_proj.scales": "model.safetensors",
330
- "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
331
- "model.layers.2.mlp.up_proj.biases": "model.safetensors",
332
- "model.layers.2.mlp.up_proj.scales": "model.safetensors",
333
- "model.layers.2.mlp.up_proj.weight": "model.safetensors",
334
- "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
335
- "model.layers.2.self_attn.k_proj.bias": "model.safetensors",
336
- "model.layers.2.self_attn.k_proj.biases": "model.safetensors",
337
- "model.layers.2.self_attn.k_proj.scales": "model.safetensors",
338
- "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
339
- "model.layers.2.self_attn.o_proj.biases": "model.safetensors",
340
- "model.layers.2.self_attn.o_proj.scales": "model.safetensors",
341
- "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
342
- "model.layers.2.self_attn.q_proj.bias": "model.safetensors",
343
- "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
344
- "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
345
- "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
346
- "model.layers.2.self_attn.v_proj.bias": "model.safetensors",
347
- "model.layers.2.self_attn.v_proj.biases": "model.safetensors",
348
- "model.layers.2.self_attn.v_proj.scales": "model.safetensors",
349
- "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
350
- "model.layers.20.input_layernorm.weight": "model.safetensors",
351
- "model.layers.20.mlp.down_proj.biases": "model.safetensors",
352
- "model.layers.20.mlp.down_proj.scales": "model.safetensors",
353
- "model.layers.20.mlp.down_proj.weight": "model.safetensors",
354
- "model.layers.20.mlp.gate_proj.biases": "model.safetensors",
355
- "model.layers.20.mlp.gate_proj.scales": "model.safetensors",
356
- "model.layers.20.mlp.gate_proj.weight": "model.safetensors",
357
- "model.layers.20.mlp.up_proj.biases": "model.safetensors",
358
- "model.layers.20.mlp.up_proj.scales": "model.safetensors",
359
- "model.layers.20.mlp.up_proj.weight": "model.safetensors",
360
- "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
361
- "model.layers.20.self_attn.k_proj.bias": "model.safetensors",
362
- "model.layers.20.self_attn.k_proj.biases": "model.safetensors",
363
- "model.layers.20.self_attn.k_proj.scales": "model.safetensors",
364
- "model.layers.20.self_attn.k_proj.weight": "model.safetensors",
365
- "model.layers.20.self_attn.o_proj.biases": "model.safetensors",
366
- "model.layers.20.self_attn.o_proj.scales": "model.safetensors",
367
- "model.layers.20.self_attn.o_proj.weight": "model.safetensors",
368
- "model.layers.20.self_attn.q_proj.bias": "model.safetensors",
369
- "model.layers.20.self_attn.q_proj.biases": "model.safetensors",
370
- "model.layers.20.self_attn.q_proj.scales": "model.safetensors",
371
- "model.layers.20.self_attn.q_proj.weight": "model.safetensors",
372
- "model.layers.20.self_attn.v_proj.bias": "model.safetensors",
373
- "model.layers.20.self_attn.v_proj.biases": "model.safetensors",
374
- "model.layers.20.self_attn.v_proj.scales": "model.safetensors",
375
- "model.layers.20.self_attn.v_proj.weight": "model.safetensors",
376
- "model.layers.21.input_layernorm.weight": "model.safetensors",
377
- "model.layers.21.mlp.down_proj.biases": "model.safetensors",
378
- "model.layers.21.mlp.down_proj.scales": "model.safetensors",
379
- "model.layers.21.mlp.down_proj.weight": "model.safetensors",
380
- "model.layers.21.mlp.gate_proj.biases": "model.safetensors",
381
- "model.layers.21.mlp.gate_proj.scales": "model.safetensors",
382
- "model.layers.21.mlp.gate_proj.weight": "model.safetensors",
383
- "model.layers.21.mlp.up_proj.biases": "model.safetensors",
384
- "model.layers.21.mlp.up_proj.scales": "model.safetensors",
385
- "model.layers.21.mlp.up_proj.weight": "model.safetensors",
386
- "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
387
- "model.layers.21.self_attn.k_proj.bias": "model.safetensors",
388
- "model.layers.21.self_attn.k_proj.biases": "model.safetensors",
389
- "model.layers.21.self_attn.k_proj.scales": "model.safetensors",
390
- "model.layers.21.self_attn.k_proj.weight": "model.safetensors",
391
- "model.layers.21.self_attn.o_proj.biases": "model.safetensors",
392
- "model.layers.21.self_attn.o_proj.scales": "model.safetensors",
393
- "model.layers.21.self_attn.o_proj.weight": "model.safetensors",
394
- "model.layers.21.self_attn.q_proj.bias": "model.safetensors",
395
- "model.layers.21.self_attn.q_proj.biases": "model.safetensors",
396
- "model.layers.21.self_attn.q_proj.scales": "model.safetensors",
397
- "model.layers.21.self_attn.q_proj.weight": "model.safetensors",
398
- "model.layers.21.self_attn.v_proj.bias": "model.safetensors",
399
- "model.layers.21.self_attn.v_proj.biases": "model.safetensors",
400
- "model.layers.21.self_attn.v_proj.scales": "model.safetensors",
401
- "model.layers.21.self_attn.v_proj.weight": "model.safetensors",
402
- "model.layers.22.input_layernorm.weight": "model.safetensors",
403
- "model.layers.22.mlp.down_proj.biases": "model.safetensors",
404
- "model.layers.22.mlp.down_proj.scales": "model.safetensors",
405
- "model.layers.22.mlp.down_proj.weight": "model.safetensors",
406
- "model.layers.22.mlp.gate_proj.biases": "model.safetensors",
407
- "model.layers.22.mlp.gate_proj.scales": "model.safetensors",
408
- "model.layers.22.mlp.gate_proj.weight": "model.safetensors",
409
- "model.layers.22.mlp.up_proj.biases": "model.safetensors",
410
- "model.layers.22.mlp.up_proj.scales": "model.safetensors",
411
- "model.layers.22.mlp.up_proj.weight": "model.safetensors",
412
- "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
413
- "model.layers.22.self_attn.k_proj.bias": "model.safetensors",
414
- "model.layers.22.self_attn.k_proj.biases": "model.safetensors",
415
- "model.layers.22.self_attn.k_proj.scales": "model.safetensors",
416
- "model.layers.22.self_attn.k_proj.weight": "model.safetensors",
417
- "model.layers.22.self_attn.o_proj.biases": "model.safetensors",
418
- "model.layers.22.self_attn.o_proj.scales": "model.safetensors",
419
- "model.layers.22.self_attn.o_proj.weight": "model.safetensors",
420
- "model.layers.22.self_attn.q_proj.bias": "model.safetensors",
421
- "model.layers.22.self_attn.q_proj.biases": "model.safetensors",
422
- "model.layers.22.self_attn.q_proj.scales": "model.safetensors",
423
- "model.layers.22.self_attn.q_proj.weight": "model.safetensors",
424
- "model.layers.22.self_attn.v_proj.bias": "model.safetensors",
425
- "model.layers.22.self_attn.v_proj.biases": "model.safetensors",
426
- "model.layers.22.self_attn.v_proj.scales": "model.safetensors",
427
- "model.layers.22.self_attn.v_proj.weight": "model.safetensors",
428
- "model.layers.23.input_layernorm.weight": "model.safetensors",
429
- "model.layers.23.mlp.down_proj.biases": "model.safetensors",
430
- "model.layers.23.mlp.down_proj.scales": "model.safetensors",
431
- "model.layers.23.mlp.down_proj.weight": "model.safetensors",
432
- "model.layers.23.mlp.gate_proj.biases": "model.safetensors",
433
- "model.layers.23.mlp.gate_proj.scales": "model.safetensors",
434
- "model.layers.23.mlp.gate_proj.weight": "model.safetensors",
435
- "model.layers.23.mlp.up_proj.biases": "model.safetensors",
436
- "model.layers.23.mlp.up_proj.scales": "model.safetensors",
437
- "model.layers.23.mlp.up_proj.weight": "model.safetensors",
438
- "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
439
- "model.layers.23.self_attn.k_proj.bias": "model.safetensors",
440
- "model.layers.23.self_attn.k_proj.biases": "model.safetensors",
441
- "model.layers.23.self_attn.k_proj.scales": "model.safetensors",
442
- "model.layers.23.self_attn.k_proj.weight": "model.safetensors",
443
- "model.layers.23.self_attn.o_proj.biases": "model.safetensors",
444
- "model.layers.23.self_attn.o_proj.scales": "model.safetensors",
445
- "model.layers.23.self_attn.o_proj.weight": "model.safetensors",
446
- "model.layers.23.self_attn.q_proj.bias": "model.safetensors",
447
- "model.layers.23.self_attn.q_proj.biases": "model.safetensors",
448
- "model.layers.23.self_attn.q_proj.scales": "model.safetensors",
449
- "model.layers.23.self_attn.q_proj.weight": "model.safetensors",
450
- "model.layers.23.self_attn.v_proj.bias": "model.safetensors",
451
- "model.layers.23.self_attn.v_proj.biases": "model.safetensors",
452
- "model.layers.23.self_attn.v_proj.scales": "model.safetensors",
453
- "model.layers.23.self_attn.v_proj.weight": "model.safetensors",
454
- "model.layers.24.input_layernorm.weight": "model.safetensors",
455
- "model.layers.24.mlp.down_proj.biases": "model.safetensors",
456
- "model.layers.24.mlp.down_proj.scales": "model.safetensors",
457
- "model.layers.24.mlp.down_proj.weight": "model.safetensors",
458
- "model.layers.24.mlp.gate_proj.biases": "model.safetensors",
459
- "model.layers.24.mlp.gate_proj.scales": "model.safetensors",
460
- "model.layers.24.mlp.gate_proj.weight": "model.safetensors",
461
- "model.layers.24.mlp.up_proj.biases": "model.safetensors",
462
- "model.layers.24.mlp.up_proj.scales": "model.safetensors",
463
- "model.layers.24.mlp.up_proj.weight": "model.safetensors",
464
- "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
465
- "model.layers.24.self_attn.k_proj.bias": "model.safetensors",
466
- "model.layers.24.self_attn.k_proj.biases": "model.safetensors",
467
- "model.layers.24.self_attn.k_proj.scales": "model.safetensors",
468
- "model.layers.24.self_attn.k_proj.weight": "model.safetensors",
469
- "model.layers.24.self_attn.o_proj.biases": "model.safetensors",
470
- "model.layers.24.self_attn.o_proj.scales": "model.safetensors",
471
- "model.layers.24.self_attn.o_proj.weight": "model.safetensors",
472
- "model.layers.24.self_attn.q_proj.bias": "model.safetensors",
473
- "model.layers.24.self_attn.q_proj.biases": "model.safetensors",
474
- "model.layers.24.self_attn.q_proj.scales": "model.safetensors",
475
- "model.layers.24.self_attn.q_proj.weight": "model.safetensors",
476
- "model.layers.24.self_attn.v_proj.bias": "model.safetensors",
477
- "model.layers.24.self_attn.v_proj.biases": "model.safetensors",
478
- "model.layers.24.self_attn.v_proj.scales": "model.safetensors",
479
- "model.layers.24.self_attn.v_proj.weight": "model.safetensors",
480
- "model.layers.25.input_layernorm.weight": "model.safetensors",
481
- "model.layers.25.mlp.down_proj.biases": "model.safetensors",
482
- "model.layers.25.mlp.down_proj.scales": "model.safetensors",
483
- "model.layers.25.mlp.down_proj.weight": "model.safetensors",
484
- "model.layers.25.mlp.gate_proj.biases": "model.safetensors",
485
- "model.layers.25.mlp.gate_proj.scales": "model.safetensors",
486
- "model.layers.25.mlp.gate_proj.weight": "model.safetensors",
487
- "model.layers.25.mlp.up_proj.biases": "model.safetensors",
488
- "model.layers.25.mlp.up_proj.scales": "model.safetensors",
489
- "model.layers.25.mlp.up_proj.weight": "model.safetensors",
490
- "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
491
- "model.layers.25.self_attn.k_proj.bias": "model.safetensors",
492
- "model.layers.25.self_attn.k_proj.biases": "model.safetensors",
493
- "model.layers.25.self_attn.k_proj.scales": "model.safetensors",
494
- "model.layers.25.self_attn.k_proj.weight": "model.safetensors",
495
- "model.layers.25.self_attn.o_proj.biases": "model.safetensors",
496
- "model.layers.25.self_attn.o_proj.scales": "model.safetensors",
497
- "model.layers.25.self_attn.o_proj.weight": "model.safetensors",
498
- "model.layers.25.self_attn.q_proj.bias": "model.safetensors",
499
- "model.layers.25.self_attn.q_proj.biases": "model.safetensors",
500
- "model.layers.25.self_attn.q_proj.scales": "model.safetensors",
501
- "model.layers.25.self_attn.q_proj.weight": "model.safetensors",
502
- "model.layers.25.self_attn.v_proj.bias": "model.safetensors",
503
- "model.layers.25.self_attn.v_proj.biases": "model.safetensors",
504
- "model.layers.25.self_attn.v_proj.scales": "model.safetensors",
505
- "model.layers.25.self_attn.v_proj.weight": "model.safetensors",
506
- "model.layers.26.input_layernorm.weight": "model.safetensors",
507
- "model.layers.26.mlp.down_proj.biases": "model.safetensors",
508
- "model.layers.26.mlp.down_proj.scales": "model.safetensors",
509
- "model.layers.26.mlp.down_proj.weight": "model.safetensors",
510
- "model.layers.26.mlp.gate_proj.biases": "model.safetensors",
511
- "model.layers.26.mlp.gate_proj.scales": "model.safetensors",
512
- "model.layers.26.mlp.gate_proj.weight": "model.safetensors",
513
- "model.layers.26.mlp.up_proj.biases": "model.safetensors",
514
- "model.layers.26.mlp.up_proj.scales": "model.safetensors",
515
- "model.layers.26.mlp.up_proj.weight": "model.safetensors",
516
- "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
517
- "model.layers.26.self_attn.k_proj.bias": "model.safetensors",
518
- "model.layers.26.self_attn.k_proj.biases": "model.safetensors",
519
- "model.layers.26.self_attn.k_proj.scales": "model.safetensors",
520
- "model.layers.26.self_attn.k_proj.weight": "model.safetensors",
521
- "model.layers.26.self_attn.o_proj.biases": "model.safetensors",
522
- "model.layers.26.self_attn.o_proj.scales": "model.safetensors",
523
- "model.layers.26.self_attn.o_proj.weight": "model.safetensors",
524
- "model.layers.26.self_attn.q_proj.bias": "model.safetensors",
525
- "model.layers.26.self_attn.q_proj.biases": "model.safetensors",
526
- "model.layers.26.self_attn.q_proj.scales": "model.safetensors",
527
- "model.layers.26.self_attn.q_proj.weight": "model.safetensors",
528
- "model.layers.26.self_attn.v_proj.bias": "model.safetensors",
529
- "model.layers.26.self_attn.v_proj.biases": "model.safetensors",
530
- "model.layers.26.self_attn.v_proj.scales": "model.safetensors",
531
- "model.layers.26.self_attn.v_proj.weight": "model.safetensors",
532
- "model.layers.27.input_layernorm.weight": "model.safetensors",
533
- "model.layers.27.mlp.down_proj.biases": "model.safetensors",
534
- "model.layers.27.mlp.down_proj.scales": "model.safetensors",
535
- "model.layers.27.mlp.down_proj.weight": "model.safetensors",
536
- "model.layers.27.mlp.gate_proj.biases": "model.safetensors",
537
- "model.layers.27.mlp.gate_proj.scales": "model.safetensors",
538
- "model.layers.27.mlp.gate_proj.weight": "model.safetensors",
539
- "model.layers.27.mlp.up_proj.biases": "model.safetensors",
540
- "model.layers.27.mlp.up_proj.scales": "model.safetensors",
541
- "model.layers.27.mlp.up_proj.weight": "model.safetensors",
542
- "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
543
- "model.layers.27.self_attn.k_proj.bias": "model.safetensors",
544
- "model.layers.27.self_attn.k_proj.biases": "model.safetensors",
545
- "model.layers.27.self_attn.k_proj.scales": "model.safetensors",
546
- "model.layers.27.self_attn.k_proj.weight": "model.safetensors",
547
- "model.layers.27.self_attn.o_proj.biases": "model.safetensors",
548
- "model.layers.27.self_attn.o_proj.scales": "model.safetensors",
549
- "model.layers.27.self_attn.o_proj.weight": "model.safetensors",
550
- "model.layers.27.self_attn.q_proj.bias": "model.safetensors",
551
- "model.layers.27.self_attn.q_proj.biases": "model.safetensors",
552
- "model.layers.27.self_attn.q_proj.scales": "model.safetensors",
553
- "model.layers.27.self_attn.q_proj.weight": "model.safetensors",
554
- "model.layers.27.self_attn.v_proj.bias": "model.safetensors",
555
- "model.layers.27.self_attn.v_proj.biases": "model.safetensors",
556
- "model.layers.27.self_attn.v_proj.scales": "model.safetensors",
557
- "model.layers.27.self_attn.v_proj.weight": "model.safetensors",
558
- "model.layers.3.input_layernorm.weight": "model.safetensors",
559
- "model.layers.3.mlp.down_proj.biases": "model.safetensors",
560
- "model.layers.3.mlp.down_proj.scales": "model.safetensors",
561
- "model.layers.3.mlp.down_proj.weight": "model.safetensors",
562
- "model.layers.3.mlp.gate_proj.biases": "model.safetensors",
563
- "model.layers.3.mlp.gate_proj.scales": "model.safetensors",
564
- "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
565
- "model.layers.3.mlp.up_proj.biases": "model.safetensors",
566
- "model.layers.3.mlp.up_proj.scales": "model.safetensors",
567
- "model.layers.3.mlp.up_proj.weight": "model.safetensors",
568
- "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
569
- "model.layers.3.self_attn.k_proj.bias": "model.safetensors",
570
- "model.layers.3.self_attn.k_proj.biases": "model.safetensors",
571
- "model.layers.3.self_attn.k_proj.scales": "model.safetensors",
572
- "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
573
- "model.layers.3.self_attn.o_proj.biases": "model.safetensors",
574
- "model.layers.3.self_attn.o_proj.scales": "model.safetensors",
575
- "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
576
- "model.layers.3.self_attn.q_proj.bias": "model.safetensors",
577
- "model.layers.3.self_attn.q_proj.biases": "model.safetensors",
578
- "model.layers.3.self_attn.q_proj.scales": "model.safetensors",
579
- "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
580
- "model.layers.3.self_attn.v_proj.bias": "model.safetensors",
581
- "model.layers.3.self_attn.v_proj.biases": "model.safetensors",
582
- "model.layers.3.self_attn.v_proj.scales": "model.safetensors",
583
- "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
584
- "model.layers.4.input_layernorm.weight": "model.safetensors",
585
- "model.layers.4.mlp.down_proj.biases": "model.safetensors",
586
- "model.layers.4.mlp.down_proj.scales": "model.safetensors",
587
- "model.layers.4.mlp.down_proj.weight": "model.safetensors",
588
- "model.layers.4.mlp.gate_proj.biases": "model.safetensors",
589
- "model.layers.4.mlp.gate_proj.scales": "model.safetensors",
590
- "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
591
- "model.layers.4.mlp.up_proj.biases": "model.safetensors",
592
- "model.layers.4.mlp.up_proj.scales": "model.safetensors",
593
- "model.layers.4.mlp.up_proj.weight": "model.safetensors",
594
- "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
595
- "model.layers.4.self_attn.k_proj.bias": "model.safetensors",
596
- "model.layers.4.self_attn.k_proj.biases": "model.safetensors",
597
- "model.layers.4.self_attn.k_proj.scales": "model.safetensors",
598
- "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
599
- "model.layers.4.self_attn.o_proj.biases": "model.safetensors",
600
- "model.layers.4.self_attn.o_proj.scales": "model.safetensors",
601
- "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
602
- "model.layers.4.self_attn.q_proj.bias": "model.safetensors",
603
- "model.layers.4.self_attn.q_proj.biases": "model.safetensors",
604
- "model.layers.4.self_attn.q_proj.scales": "model.safetensors",
605
- "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
606
- "model.layers.4.self_attn.v_proj.bias": "model.safetensors",
607
- "model.layers.4.self_attn.v_proj.biases": "model.safetensors",
608
- "model.layers.4.self_attn.v_proj.scales": "model.safetensors",
609
- "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
610
- "model.layers.5.input_layernorm.weight": "model.safetensors",
611
- "model.layers.5.mlp.down_proj.biases": "model.safetensors",
612
- "model.layers.5.mlp.down_proj.scales": "model.safetensors",
613
- "model.layers.5.mlp.down_proj.weight": "model.safetensors",
614
- "model.layers.5.mlp.gate_proj.biases": "model.safetensors",
615
- "model.layers.5.mlp.gate_proj.scales": "model.safetensors",
616
- "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
617
- "model.layers.5.mlp.up_proj.biases": "model.safetensors",
618
- "model.layers.5.mlp.up_proj.scales": "model.safetensors",
619
- "model.layers.5.mlp.up_proj.weight": "model.safetensors",
620
- "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
621
- "model.layers.5.self_attn.k_proj.bias": "model.safetensors",
622
- "model.layers.5.self_attn.k_proj.biases": "model.safetensors",
623
- "model.layers.5.self_attn.k_proj.scales": "model.safetensors",
624
- "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
625
- "model.layers.5.self_attn.o_proj.biases": "model.safetensors",
626
- "model.layers.5.self_attn.o_proj.scales": "model.safetensors",
627
- "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
628
- "model.layers.5.self_attn.q_proj.bias": "model.safetensors",
629
- "model.layers.5.self_attn.q_proj.biases": "model.safetensors",
630
- "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
631
- "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
632
- "model.layers.5.self_attn.v_proj.bias": "model.safetensors",
633
- "model.layers.5.self_attn.v_proj.biases": "model.safetensors",
634
- "model.layers.5.self_attn.v_proj.scales": "model.safetensors",
635
- "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
636
- "model.layers.6.input_layernorm.weight": "model.safetensors",
637
- "model.layers.6.mlp.down_proj.biases": "model.safetensors",
638
- "model.layers.6.mlp.down_proj.scales": "model.safetensors",
639
- "model.layers.6.mlp.down_proj.weight": "model.safetensors",
640
- "model.layers.6.mlp.gate_proj.biases": "model.safetensors",
641
- "model.layers.6.mlp.gate_proj.scales": "model.safetensors",
642
- "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
643
- "model.layers.6.mlp.up_proj.biases": "model.safetensors",
644
- "model.layers.6.mlp.up_proj.scales": "model.safetensors",
645
- "model.layers.6.mlp.up_proj.weight": "model.safetensors",
646
- "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
647
- "model.layers.6.self_attn.k_proj.bias": "model.safetensors",
648
- "model.layers.6.self_attn.k_proj.biases": "model.safetensors",
649
- "model.layers.6.self_attn.k_proj.scales": "model.safetensors",
650
- "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
651
- "model.layers.6.self_attn.o_proj.biases": "model.safetensors",
652
- "model.layers.6.self_attn.o_proj.scales": "model.safetensors",
653
- "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
654
- "model.layers.6.self_attn.q_proj.bias": "model.safetensors",
655
- "model.layers.6.self_attn.q_proj.biases": "model.safetensors",
656
- "model.layers.6.self_attn.q_proj.scales": "model.safetensors",
657
- "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
658
- "model.layers.6.self_attn.v_proj.bias": "model.safetensors",
659
- "model.layers.6.self_attn.v_proj.biases": "model.safetensors",
660
- "model.layers.6.self_attn.v_proj.scales": "model.safetensors",
661
- "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
662
- "model.layers.7.input_layernorm.weight": "model.safetensors",
663
- "model.layers.7.mlp.down_proj.biases": "model.safetensors",
664
- "model.layers.7.mlp.down_proj.scales": "model.safetensors",
665
- "model.layers.7.mlp.down_proj.weight": "model.safetensors",
666
- "model.layers.7.mlp.gate_proj.biases": "model.safetensors",
667
- "model.layers.7.mlp.gate_proj.scales": "model.safetensors",
668
- "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
669
- "model.layers.7.mlp.up_proj.biases": "model.safetensors",
670
- "model.layers.7.mlp.up_proj.scales": "model.safetensors",
671
- "model.layers.7.mlp.up_proj.weight": "model.safetensors",
672
- "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
673
- "model.layers.7.self_attn.k_proj.bias": "model.safetensors",
674
- "model.layers.7.self_attn.k_proj.biases": "model.safetensors",
675
- "model.layers.7.self_attn.k_proj.scales": "model.safetensors",
676
- "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
677
- "model.layers.7.self_attn.o_proj.biases": "model.safetensors",
678
- "model.layers.7.self_attn.o_proj.scales": "model.safetensors",
679
- "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
680
- "model.layers.7.self_attn.q_proj.bias": "model.safetensors",
681
- "model.layers.7.self_attn.q_proj.biases": "model.safetensors",
682
- "model.layers.7.self_attn.q_proj.scales": "model.safetensors",
683
- "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
684
- "model.layers.7.self_attn.v_proj.bias": "model.safetensors",
685
- "model.layers.7.self_attn.v_proj.biases": "model.safetensors",
686
- "model.layers.7.self_attn.v_proj.scales": "model.safetensors",
687
- "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
688
- "model.layers.8.input_layernorm.weight": "model.safetensors",
689
- "model.layers.8.mlp.down_proj.biases": "model.safetensors",
690
- "model.layers.8.mlp.down_proj.scales": "model.safetensors",
691
- "model.layers.8.mlp.down_proj.weight": "model.safetensors",
692
- "model.layers.8.mlp.gate_proj.biases": "model.safetensors",
693
- "model.layers.8.mlp.gate_proj.scales": "model.safetensors",
694
- "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
695
- "model.layers.8.mlp.up_proj.biases": "model.safetensors",
696
- "model.layers.8.mlp.up_proj.scales": "model.safetensors",
697
- "model.layers.8.mlp.up_proj.weight": "model.safetensors",
698
- "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
699
- "model.layers.8.self_attn.k_proj.bias": "model.safetensors",
700
- "model.layers.8.self_attn.k_proj.biases": "model.safetensors",
701
- "model.layers.8.self_attn.k_proj.scales": "model.safetensors",
702
- "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
703
- "model.layers.8.self_attn.o_proj.biases": "model.safetensors",
704
- "model.layers.8.self_attn.o_proj.scales": "model.safetensors",
705
- "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
706
- "model.layers.8.self_attn.q_proj.bias": "model.safetensors",
707
- "model.layers.8.self_attn.q_proj.biases": "model.safetensors",
708
- "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
709
- "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
710
- "model.layers.8.self_attn.v_proj.bias": "model.safetensors",
711
- "model.layers.8.self_attn.v_proj.biases": "model.safetensors",
712
- "model.layers.8.self_attn.v_proj.scales": "model.safetensors",
713
- "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
714
- "model.layers.9.input_layernorm.weight": "model.safetensors",
715
- "model.layers.9.mlp.down_proj.biases": "model.safetensors",
716
- "model.layers.9.mlp.down_proj.scales": "model.safetensors",
717
- "model.layers.9.mlp.down_proj.weight": "model.safetensors",
718
- "model.layers.9.mlp.gate_proj.biases": "model.safetensors",
719
- "model.layers.9.mlp.gate_proj.scales": "model.safetensors",
720
- "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
721
- "model.layers.9.mlp.up_proj.biases": "model.safetensors",
722
- "model.layers.9.mlp.up_proj.scales": "model.safetensors",
723
- "model.layers.9.mlp.up_proj.weight": "model.safetensors",
724
- "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
725
- "model.layers.9.self_attn.k_proj.bias": "model.safetensors",
726
- "model.layers.9.self_attn.k_proj.biases": "model.safetensors",
727
- "model.layers.9.self_attn.k_proj.scales": "model.safetensors",
728
- "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
729
- "model.layers.9.self_attn.o_proj.biases": "model.safetensors",
730
- "model.layers.9.self_attn.o_proj.scales": "model.safetensors",
731
- "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
732
- "model.layers.9.self_attn.q_proj.bias": "model.safetensors",
733
- "model.layers.9.self_attn.q_proj.biases": "model.safetensors",
734
- "model.layers.9.self_attn.q_proj.scales": "model.safetensors",
735
- "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
736
- "model.layers.9.self_attn.v_proj.bias": "model.safetensors",
737
- "model.layers.9.self_attn.v_proj.biases": "model.safetensors",
738
- "model.layers.9.self_attn.v_proj.scales": "model.safetensors",
739
- "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
740
- "model.norm.weight": "model.safetensors"
741
- }
742
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4bit/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4bit/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-4bit/tokenizer_config.json DELETED
@@ -1,195 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "add_prefix_space": null,
5
- "added_tokens_decoder": {
6
- "151643": {
7
- "content": "<|end▁of▁sentence|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false,
12
- "special": true
13
- },
14
- "151644": {
15
- "content": "<|User|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": false
21
- },
22
- "151645": {
23
- "content": "<|Assistant|>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false,
28
- "special": false
29
- },
30
- "151646": {
31
- "content": "<|begin▁of▁sentence|>",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": true
37
- },
38
- "151647": {
39
- "content": "<|EOT|>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "151648": {
47
- "content": "<think>",
48
- "lstrip": false,
49
- "normalized": false,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "151649": {
55
- "content": "</think>",
56
- "lstrip": false,
57
- "normalized": false,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "151650": {
63
- "content": "<|quad_start|>",
64
- "lstrip": false,
65
- "normalized": false,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": true
69
- },
70
- "151651": {
71
- "content": "<|quad_end|>",
72
- "lstrip": false,
73
- "normalized": false,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": true
77
- },
78
- "151652": {
79
- "content": "<|vision_start|>",
80
- "lstrip": false,
81
- "normalized": false,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": true
85
- },
86
- "151653": {
87
- "content": "<|vision_end|>",
88
- "lstrip": false,
89
- "normalized": false,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": true
93
- },
94
- "151654": {
95
- "content": "<|vision_pad|>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": true
101
- },
102
- "151655": {
103
- "content": "<|image_pad|>",
104
- "lstrip": false,
105
- "normalized": false,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": true
109
- },
110
- "151656": {
111
- "content": "<|video_pad|>",
112
- "lstrip": false,
113
- "normalized": false,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": true
117
- },
118
- "151657": {
119
- "content": "<tool_call>",
120
- "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "151658": {
127
- "content": "</tool_call>",
128
- "lstrip": false,
129
- "normalized": false,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "151659": {
135
- "content": "<|fim_prefix|>",
136
- "lstrip": false,
137
- "normalized": false,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "151660": {
143
- "content": "<|fim_middle|>",
144
- "lstrip": false,
145
- "normalized": false,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "151661": {
151
- "content": "<|fim_suffix|>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "151662": {
159
- "content": "<|fim_pad|>",
160
- "lstrip": false,
161
- "normalized": false,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "151663": {
167
- "content": "<|repo_name|>",
168
- "lstrip": false,
169
- "normalized": false,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "151664": {
175
- "content": "<|file_sep|>",
176
- "lstrip": false,
177
- "normalized": false,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- }
182
- },
183
- "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin��>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
185
- "clean_up_tokenization_spaces": false,
186
- "eos_token": "<|end▁of▁sentence|>",
187
- "extra_special_tokens": {},
188
- "legacy": true,
189
- "model_max_length": 16384,
190
- "pad_token": "<|end▁of▁sentence|>",
191
- "sp_model_kwargs": {},
192
- "tokenizer_class": "LlamaTokenizerFast",
193
- "unk_token": null,
194
- "use_default_system_prompt": false
195
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-6bit/config.json DELETED
@@ -1,36 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen2ForCausalLM"
4
- ],
5
- "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
- "eos_token_id": 151643,
8
- "hidden_act": "silu",
9
- "hidden_size": 3584,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
- "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "quantization": {
19
- "group_size": 64,
20
- "bits": 6
21
- },
22
- "quantization_config": {
23
- "group_size": 64,
24
- "bits": 6
25
- },
26
- "rms_norm_eps": 1e-06,
27
- "rope_theta": 10000,
28
- "sliding_window": 4096,
29
- "tie_word_embeddings": false,
30
- "torch_dtype": "bfloat16",
31
- "transformers_version": "4.44.0",
32
- "use_cache": true,
33
- "use_mrope": false,
34
- "use_sliding_window": false,
35
- "vocab_size": 152064
36
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-6bit/model-00001-of-00002.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0278656b74a07c501a2f771f7b52b9a0d663267f8985276116990573ab96b514
3
- size 5366596284
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-6bit/model-00002-of-00002.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:05f9da56da40bd4d306345700e0a3be1f90709a2372568810d5fc7681138a532
3
- size 821570604
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-6bit/model.safetensors.index.json DELETED
@@ -1,742 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 6188084224
4
- },
5
- "weight_map": {
6
- "lm_head.biases": "model-00002-of-00002.safetensors",
7
- "lm_head.scales": "model-00002-of-00002.safetensors",
8
- "lm_head.weight": "model-00002-of-00002.safetensors",
9
- "model.embed_tokens.biases": "model-00001-of-00002.safetensors",
10
- "model.embed_tokens.scales": "model-00001-of-00002.safetensors",
11
- "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
12
- "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
13
- "model.layers.0.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
14
- "model.layers.0.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
15
- "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
16
- "model.layers.0.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
17
- "model.layers.0.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
18
- "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
19
- "model.layers.0.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
20
- "model.layers.0.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
21
- "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
22
- "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
23
- "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
24
- "model.layers.0.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
25
- "model.layers.0.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
26
- "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
27
- "model.layers.0.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
28
- "model.layers.0.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
29
- "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
30
- "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
31
- "model.layers.0.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
32
- "model.layers.0.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
33
- "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
34
- "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
35
- "model.layers.0.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
36
- "model.layers.0.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
37
- "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
38
- "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
39
- "model.layers.1.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
40
- "model.layers.1.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
41
- "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
42
- "model.layers.1.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
43
- "model.layers.1.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
44
- "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
45
- "model.layers.1.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
46
- "model.layers.1.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
47
- "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
48
- "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
49
- "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
50
- "model.layers.1.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
51
- "model.layers.1.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
52
- "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
53
- "model.layers.1.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
54
- "model.layers.1.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
55
- "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
56
- "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
57
- "model.layers.1.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
58
- "model.layers.1.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
59
- "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
60
- "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
61
- "model.layers.1.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
62
- "model.layers.1.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
63
- "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
64
- "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
65
- "model.layers.10.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
66
- "model.layers.10.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
67
- "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
68
- "model.layers.10.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
69
- "model.layers.10.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
70
- "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
71
- "model.layers.10.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
72
- "model.layers.10.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
73
- "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
74
- "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
75
- "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
76
- "model.layers.10.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
77
- "model.layers.10.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
78
- "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
79
- "model.layers.10.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
80
- "model.layers.10.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
81
- "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
82
- "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
83
- "model.layers.10.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
84
- "model.layers.10.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
85
- "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
86
- "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
87
- "model.layers.10.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
88
- "model.layers.10.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
89
- "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
90
- "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
91
- "model.layers.11.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
92
- "model.layers.11.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
93
- "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
94
- "model.layers.11.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
95
- "model.layers.11.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
96
- "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
97
- "model.layers.11.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
98
- "model.layers.11.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
99
- "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
100
- "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
101
- "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
102
- "model.layers.11.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
103
- "model.layers.11.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
104
- "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
105
- "model.layers.11.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
106
- "model.layers.11.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
107
- "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
108
- "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
109
- "model.layers.11.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
110
- "model.layers.11.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
111
- "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
112
- "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
113
- "model.layers.11.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
114
- "model.layers.11.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
115
- "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
116
- "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
117
- "model.layers.12.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
118
- "model.layers.12.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
119
- "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
120
- "model.layers.12.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
121
- "model.layers.12.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
122
- "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
123
- "model.layers.12.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
124
- "model.layers.12.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
125
- "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
126
- "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
127
- "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
128
- "model.layers.12.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
129
- "model.layers.12.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
130
- "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
131
- "model.layers.12.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
132
- "model.layers.12.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
133
- "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
134
- "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
135
- "model.layers.12.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
136
- "model.layers.12.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
137
- "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
138
- "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
139
- "model.layers.12.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
140
- "model.layers.12.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
141
- "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
142
- "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
143
- "model.layers.13.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
144
- "model.layers.13.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
145
- "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
146
- "model.layers.13.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
147
- "model.layers.13.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
148
- "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
149
- "model.layers.13.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
150
- "model.layers.13.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
151
- "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
152
- "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
153
- "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
154
- "model.layers.13.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
155
- "model.layers.13.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
156
- "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
157
- "model.layers.13.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
158
- "model.layers.13.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
159
- "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
160
- "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
161
- "model.layers.13.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
162
- "model.layers.13.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
163
- "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
164
- "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
165
- "model.layers.13.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
166
- "model.layers.13.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
167
- "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
168
- "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
169
- "model.layers.14.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
170
- "model.layers.14.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
171
- "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
172
- "model.layers.14.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
173
- "model.layers.14.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
174
- "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
175
- "model.layers.14.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
176
- "model.layers.14.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
177
- "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
178
- "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
179
- "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
180
- "model.layers.14.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
181
- "model.layers.14.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
182
- "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
183
- "model.layers.14.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
184
- "model.layers.14.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
185
- "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
186
- "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
187
- "model.layers.14.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
188
- "model.layers.14.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
189
- "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
190
- "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
191
- "model.layers.14.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
192
- "model.layers.14.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
193
- "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
194
- "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
195
- "model.layers.15.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
196
- "model.layers.15.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
197
- "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
198
- "model.layers.15.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
199
- "model.layers.15.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
200
- "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
201
- "model.layers.15.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
202
- "model.layers.15.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
203
- "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
204
- "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
205
- "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
206
- "model.layers.15.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
207
- "model.layers.15.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
208
- "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
209
- "model.layers.15.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
210
- "model.layers.15.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
211
- "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
212
- "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
213
- "model.layers.15.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
214
- "model.layers.15.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
215
- "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
216
- "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
217
- "model.layers.15.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
218
- "model.layers.15.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
219
- "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
220
- "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
221
- "model.layers.16.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
222
- "model.layers.16.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
223
- "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
224
- "model.layers.16.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
225
- "model.layers.16.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
226
- "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
227
- "model.layers.16.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
228
- "model.layers.16.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
229
- "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
230
- "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
231
- "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
232
- "model.layers.16.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
233
- "model.layers.16.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
234
- "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
235
- "model.layers.16.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
236
- "model.layers.16.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
237
- "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
238
- "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
239
- "model.layers.16.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
240
- "model.layers.16.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
241
- "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
242
- "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
243
- "model.layers.16.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
244
- "model.layers.16.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
245
- "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
246
- "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
247
- "model.layers.17.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
248
- "model.layers.17.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
249
- "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
250
- "model.layers.17.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
251
- "model.layers.17.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
252
- "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
253
- "model.layers.17.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
254
- "model.layers.17.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
255
- "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
256
- "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
257
- "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
258
- "model.layers.17.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
259
- "model.layers.17.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
260
- "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
261
- "model.layers.17.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
262
- "model.layers.17.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
263
- "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
264
- "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
265
- "model.layers.17.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
266
- "model.layers.17.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
267
- "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
268
- "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
269
- "model.layers.17.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
270
- "model.layers.17.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
271
- "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
272
- "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
273
- "model.layers.18.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
274
- "model.layers.18.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
275
- "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
276
- "model.layers.18.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
277
- "model.layers.18.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
278
- "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
279
- "model.layers.18.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
280
- "model.layers.18.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
281
- "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
282
- "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
283
- "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
284
- "model.layers.18.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
285
- "model.layers.18.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
286
- "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
287
- "model.layers.18.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
288
- "model.layers.18.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
289
- "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
290
- "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
291
- "model.layers.18.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
292
- "model.layers.18.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
293
- "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
294
- "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
295
- "model.layers.18.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
296
- "model.layers.18.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
297
- "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
298
- "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
299
- "model.layers.19.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
300
- "model.layers.19.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
301
- "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
302
- "model.layers.19.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
303
- "model.layers.19.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
304
- "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
305
- "model.layers.19.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
306
- "model.layers.19.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
307
- "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
308
- "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
309
- "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
310
- "model.layers.19.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
311
- "model.layers.19.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
312
- "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
313
- "model.layers.19.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
314
- "model.layers.19.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
315
- "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
316
- "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
317
- "model.layers.19.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
318
- "model.layers.19.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
319
- "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
320
- "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
321
- "model.layers.19.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
322
- "model.layers.19.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
323
- "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
324
- "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
325
- "model.layers.2.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
326
- "model.layers.2.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
327
- "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
328
- "model.layers.2.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
329
- "model.layers.2.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
330
- "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
331
- "model.layers.2.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
332
- "model.layers.2.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
333
- "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
334
- "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
335
- "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
336
- "model.layers.2.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
337
- "model.layers.2.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
338
- "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
339
- "model.layers.2.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
340
- "model.layers.2.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
341
- "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
342
- "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
343
- "model.layers.2.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
344
- "model.layers.2.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
345
- "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
346
- "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
347
- "model.layers.2.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
348
- "model.layers.2.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
349
- "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
350
- "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
351
- "model.layers.20.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
352
- "model.layers.20.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
353
- "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
354
- "model.layers.20.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
355
- "model.layers.20.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
356
- "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
357
- "model.layers.20.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
358
- "model.layers.20.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
359
- "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
360
- "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
361
- "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
362
- "model.layers.20.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
363
- "model.layers.20.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
364
- "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
365
- "model.layers.20.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
366
- "model.layers.20.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
367
- "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
368
- "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
369
- "model.layers.20.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
370
- "model.layers.20.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
371
- "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
372
- "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
373
- "model.layers.20.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
374
- "model.layers.20.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
375
- "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
376
- "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
377
- "model.layers.21.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
378
- "model.layers.21.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
379
- "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
380
- "model.layers.21.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
381
- "model.layers.21.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
382
- "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
383
- "model.layers.21.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
384
- "model.layers.21.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
385
- "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
386
- "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
387
- "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
388
- "model.layers.21.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
389
- "model.layers.21.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
390
- "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
391
- "model.layers.21.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
392
- "model.layers.21.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
393
- "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
394
- "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
395
- "model.layers.21.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
396
- "model.layers.21.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
397
- "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
398
- "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
399
- "model.layers.21.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
400
- "model.layers.21.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
401
- "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
402
- "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
403
- "model.layers.22.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
404
- "model.layers.22.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
405
- "model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
406
- "model.layers.22.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
407
- "model.layers.22.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
408
- "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
409
- "model.layers.22.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
410
- "model.layers.22.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
411
- "model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
412
- "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
413
- "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
414
- "model.layers.22.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
415
- "model.layers.22.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
416
- "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
417
- "model.layers.22.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
418
- "model.layers.22.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
419
- "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
420
- "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
421
- "model.layers.22.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
422
- "model.layers.22.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
423
- "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
424
- "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
425
- "model.layers.22.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
426
- "model.layers.22.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
427
- "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
428
- "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
429
- "model.layers.23.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
430
- "model.layers.23.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
431
- "model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
432
- "model.layers.23.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
433
- "model.layers.23.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
434
- "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
435
- "model.layers.23.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
436
- "model.layers.23.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
437
- "model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
438
- "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
439
- "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
440
- "model.layers.23.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
441
- "model.layers.23.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
442
- "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
443
- "model.layers.23.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
444
- "model.layers.23.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
445
- "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
446
- "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
447
- "model.layers.23.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
448
- "model.layers.23.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
449
- "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
450
- "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
451
- "model.layers.23.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
452
- "model.layers.23.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
453
- "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
454
- "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
455
- "model.layers.24.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
456
- "model.layers.24.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
457
- "model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
458
- "model.layers.24.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
459
- "model.layers.24.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
460
- "model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
461
- "model.layers.24.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
462
- "model.layers.24.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
463
- "model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
464
- "model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
465
- "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
466
- "model.layers.24.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
467
- "model.layers.24.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
468
- "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
469
- "model.layers.24.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
470
- "model.layers.24.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
471
- "model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
472
- "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
473
- "model.layers.24.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
474
- "model.layers.24.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
475
- "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
476
- "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
477
- "model.layers.24.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
478
- "model.layers.24.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
479
- "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
480
- "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
481
- "model.layers.25.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
482
- "model.layers.25.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
483
- "model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
484
- "model.layers.25.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
485
- "model.layers.25.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
486
- "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
487
- "model.layers.25.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
488
- "model.layers.25.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
489
- "model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
490
- "model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
491
- "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
492
- "model.layers.25.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
493
- "model.layers.25.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
494
- "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
495
- "model.layers.25.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
496
- "model.layers.25.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
497
- "model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
498
- "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
499
- "model.layers.25.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
500
- "model.layers.25.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
501
- "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
502
- "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
503
- "model.layers.25.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
504
- "model.layers.25.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
505
- "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
506
- "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
507
- "model.layers.26.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
508
- "model.layers.26.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
509
- "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
510
- "model.layers.26.mlp.gate_proj.biases": "model-00002-of-00002.safetensors",
511
- "model.layers.26.mlp.gate_proj.scales": "model-00002-of-00002.safetensors",
512
- "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
513
- "model.layers.26.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
514
- "model.layers.26.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
515
- "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
516
- "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
517
- "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
518
- "model.layers.26.self_attn.k_proj.biases": "model-00002-of-00002.safetensors",
519
- "model.layers.26.self_attn.k_proj.scales": "model-00002-of-00002.safetensors",
520
- "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
521
- "model.layers.26.self_attn.o_proj.biases": "model-00002-of-00002.safetensors",
522
- "model.layers.26.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
523
- "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
524
- "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
525
- "model.layers.26.self_attn.q_proj.biases": "model-00002-of-00002.safetensors",
526
- "model.layers.26.self_attn.q_proj.scales": "model-00002-of-00002.safetensors",
527
- "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
528
- "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
529
- "model.layers.26.self_attn.v_proj.biases": "model-00002-of-00002.safetensors",
530
- "model.layers.26.self_attn.v_proj.scales": "model-00002-of-00002.safetensors",
531
- "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
532
- "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
533
- "model.layers.27.mlp.down_proj.biases": "model-00002-of-00002.safetensors",
534
- "model.layers.27.mlp.down_proj.scales": "model-00002-of-00002.safetensors",
535
- "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
536
- "model.layers.27.mlp.gate_proj.biases": "model-00002-of-00002.safetensors",
537
- "model.layers.27.mlp.gate_proj.scales": "model-00002-of-00002.safetensors",
538
- "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
539
- "model.layers.27.mlp.up_proj.biases": "model-00002-of-00002.safetensors",
540
- "model.layers.27.mlp.up_proj.scales": "model-00002-of-00002.safetensors",
541
- "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
542
- "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
543
- "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
544
- "model.layers.27.self_attn.k_proj.biases": "model-00002-of-00002.safetensors",
545
- "model.layers.27.self_attn.k_proj.scales": "model-00002-of-00002.safetensors",
546
- "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
547
- "model.layers.27.self_attn.o_proj.biases": "model-00002-of-00002.safetensors",
548
- "model.layers.27.self_attn.o_proj.scales": "model-00002-of-00002.safetensors",
549
- "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
550
- "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
551
- "model.layers.27.self_attn.q_proj.biases": "model-00002-of-00002.safetensors",
552
- "model.layers.27.self_attn.q_proj.scales": "model-00002-of-00002.safetensors",
553
- "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
554
- "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
555
- "model.layers.27.self_attn.v_proj.biases": "model-00002-of-00002.safetensors",
556
- "model.layers.27.self_attn.v_proj.scales": "model-00002-of-00002.safetensors",
557
- "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
558
- "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
559
- "model.layers.3.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
560
- "model.layers.3.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
561
- "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
562
- "model.layers.3.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
563
- "model.layers.3.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
564
- "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
565
- "model.layers.3.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
566
- "model.layers.3.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
567
- "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
568
- "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
569
- "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
570
- "model.layers.3.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
571
- "model.layers.3.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
572
- "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
573
- "model.layers.3.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
574
- "model.layers.3.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
575
- "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
576
- "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
577
- "model.layers.3.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
578
- "model.layers.3.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
579
- "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
580
- "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
581
- "model.layers.3.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
582
- "model.layers.3.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
583
- "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
584
- "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
585
- "model.layers.4.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
586
- "model.layers.4.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
587
- "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
588
- "model.layers.4.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
589
- "model.layers.4.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
590
- "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
591
- "model.layers.4.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
592
- "model.layers.4.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
593
- "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
594
- "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
595
- "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
596
- "model.layers.4.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
597
- "model.layers.4.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
598
- "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
599
- "model.layers.4.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
600
- "model.layers.4.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
601
- "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
602
- "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
603
- "model.layers.4.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
604
- "model.layers.4.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
605
- "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
606
- "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
607
- "model.layers.4.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
608
- "model.layers.4.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
609
- "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
610
- "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
611
- "model.layers.5.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
612
- "model.layers.5.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
613
- "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
614
- "model.layers.5.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
615
- "model.layers.5.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
616
- "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
617
- "model.layers.5.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
618
- "model.layers.5.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
619
- "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
620
- "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
621
- "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
622
- "model.layers.5.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
623
- "model.layers.5.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
624
- "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
625
- "model.layers.5.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
626
- "model.layers.5.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
627
- "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
628
- "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
629
- "model.layers.5.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
630
- "model.layers.5.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
631
- "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
632
- "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
633
- "model.layers.5.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
634
- "model.layers.5.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
635
- "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
636
- "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
637
- "model.layers.6.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
638
- "model.layers.6.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
639
- "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
640
- "model.layers.6.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
641
- "model.layers.6.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
642
- "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
643
- "model.layers.6.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
644
- "model.layers.6.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
645
- "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
646
- "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
647
- "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
648
- "model.layers.6.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
649
- "model.layers.6.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
650
- "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
651
- "model.layers.6.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
652
- "model.layers.6.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
653
- "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
654
- "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
655
- "model.layers.6.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
656
- "model.layers.6.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
657
- "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
658
- "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
659
- "model.layers.6.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
660
- "model.layers.6.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
661
- "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
662
- "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
663
- "model.layers.7.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
664
- "model.layers.7.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
665
- "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
666
- "model.layers.7.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
667
- "model.layers.7.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
668
- "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
669
- "model.layers.7.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
670
- "model.layers.7.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
671
- "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
672
- "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
673
- "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
674
- "model.layers.7.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
675
- "model.layers.7.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
676
- "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
677
- "model.layers.7.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
678
- "model.layers.7.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
679
- "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
680
- "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
681
- "model.layers.7.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
682
- "model.layers.7.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
683
- "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
684
- "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
685
- "model.layers.7.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
686
- "model.layers.7.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
687
- "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
688
- "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
689
- "model.layers.8.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
690
- "model.layers.8.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
691
- "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
692
- "model.layers.8.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
693
- "model.layers.8.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
694
- "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
695
- "model.layers.8.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
696
- "model.layers.8.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
697
- "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
698
- "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
699
- "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
700
- "model.layers.8.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
701
- "model.layers.8.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
702
- "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
703
- "model.layers.8.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
704
- "model.layers.8.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
705
- "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
706
- "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
707
- "model.layers.8.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
708
- "model.layers.8.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
709
- "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
710
- "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
711
- "model.layers.8.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
712
- "model.layers.8.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
713
- "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
714
- "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
715
- "model.layers.9.mlp.down_proj.biases": "model-00001-of-00002.safetensors",
716
- "model.layers.9.mlp.down_proj.scales": "model-00001-of-00002.safetensors",
717
- "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
718
- "model.layers.9.mlp.gate_proj.biases": "model-00001-of-00002.safetensors",
719
- "model.layers.9.mlp.gate_proj.scales": "model-00001-of-00002.safetensors",
720
- "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
721
- "model.layers.9.mlp.up_proj.biases": "model-00001-of-00002.safetensors",
722
- "model.layers.9.mlp.up_proj.scales": "model-00001-of-00002.safetensors",
723
- "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
724
- "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
725
- "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
726
- "model.layers.9.self_attn.k_proj.biases": "model-00001-of-00002.safetensors",
727
- "model.layers.9.self_attn.k_proj.scales": "model-00001-of-00002.safetensors",
728
- "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
729
- "model.layers.9.self_attn.o_proj.biases": "model-00001-of-00002.safetensors",
730
- "model.layers.9.self_attn.o_proj.scales": "model-00001-of-00002.safetensors",
731
- "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
732
- "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
733
- "model.layers.9.self_attn.q_proj.biases": "model-00001-of-00002.safetensors",
734
- "model.layers.9.self_attn.q_proj.scales": "model-00001-of-00002.safetensors",
735
- "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
736
- "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
737
- "model.layers.9.self_attn.v_proj.biases": "model-00001-of-00002.safetensors",
738
- "model.layers.9.self_attn.v_proj.scales": "model-00001-of-00002.safetensors",
739
- "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
740
- "model.norm.weight": "model-00002-of-00002.safetensors"
741
- }
742
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-6bit/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end▁of▁sentence|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
DeepSeek-R1-Distill-Qwen-7B-6bit/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893
3
- size 11422778