Update config.json
Browse files- config.json +196 -196
config.json
CHANGED
@@ -32,7 +32,7 @@
|
|
32 |
"model.layers.0.mlp.down_proj": {
|
33 |
"bias": null,
|
34 |
"enable_norm": true,
|
35 |
-
"enable_perm":
|
36 |
"group_num": 1,
|
37 |
"group_size": 18944,
|
38 |
"in_features": 18944,
|
@@ -56,7 +56,7 @@
|
|
56 |
"model.layers.0.mlp.gate_proj": {
|
57 |
"bias": null,
|
58 |
"enable_norm": true,
|
59 |
-
"enable_perm":
|
60 |
"group_num": 1,
|
61 |
"group_size": 3584,
|
62 |
"in_features": 3584,
|
@@ -80,7 +80,7 @@
|
|
80 |
"model.layers.0.mlp.up_proj": {
|
81 |
"bias": null,
|
82 |
"enable_norm": true,
|
83 |
-
"enable_perm":
|
84 |
"group_num": 1,
|
85 |
"group_size": 3584,
|
86 |
"in_features": 3584,
|
@@ -104,7 +104,7 @@
|
|
104 |
"model.layers.0.self_attn.k_proj": {
|
105 |
"bias": true,
|
106 |
"enable_norm": true,
|
107 |
-
"enable_perm":
|
108 |
"group_num": 1,
|
109 |
"group_size": 3584,
|
110 |
"in_features": 3584,
|
@@ -128,7 +128,7 @@
|
|
128 |
"model.layers.0.self_attn.o_proj": {
|
129 |
"bias": null,
|
130 |
"enable_norm": true,
|
131 |
-
"enable_perm":
|
132 |
"group_num": 1,
|
133 |
"group_size": 3584,
|
134 |
"in_features": 3584,
|
@@ -152,7 +152,7 @@
|
|
152 |
"model.layers.0.self_attn.q_proj": {
|
153 |
"bias": true,
|
154 |
"enable_norm": true,
|
155 |
-
"enable_perm":
|
156 |
"group_num": 1,
|
157 |
"group_size": 3584,
|
158 |
"in_features": 3584,
|
@@ -176,7 +176,7 @@
|
|
176 |
"model.layers.0.self_attn.v_proj": {
|
177 |
"bias": true,
|
178 |
"enable_norm": true,
|
179 |
-
"enable_perm":
|
180 |
"group_num": 1,
|
181 |
"group_size": 3584,
|
182 |
"in_features": 3584,
|
@@ -200,7 +200,7 @@
|
|
200 |
"model.layers.1.mlp.down_proj": {
|
201 |
"bias": null,
|
202 |
"enable_norm": true,
|
203 |
-
"enable_perm":
|
204 |
"group_num": 1,
|
205 |
"group_size": 18944,
|
206 |
"in_features": 18944,
|
@@ -224,7 +224,7 @@
|
|
224 |
"model.layers.1.mlp.gate_proj": {
|
225 |
"bias": null,
|
226 |
"enable_norm": true,
|
227 |
-
"enable_perm":
|
228 |
"group_num": 1,
|
229 |
"group_size": 3584,
|
230 |
"in_features": 3584,
|
@@ -248,7 +248,7 @@
|
|
248 |
"model.layers.1.mlp.up_proj": {
|
249 |
"bias": null,
|
250 |
"enable_norm": true,
|
251 |
-
"enable_perm":
|
252 |
"group_num": 1,
|
253 |
"group_size": 3584,
|
254 |
"in_features": 3584,
|
@@ -272,7 +272,7 @@
|
|
272 |
"model.layers.1.self_attn.k_proj": {
|
273 |
"bias": true,
|
274 |
"enable_norm": true,
|
275 |
-
"enable_perm":
|
276 |
"group_num": 1,
|
277 |
"group_size": 3584,
|
278 |
"in_features": 3584,
|
@@ -296,7 +296,7 @@
|
|
296 |
"model.layers.1.self_attn.o_proj": {
|
297 |
"bias": null,
|
298 |
"enable_norm": true,
|
299 |
-
"enable_perm":
|
300 |
"group_num": 1,
|
301 |
"group_size": 3584,
|
302 |
"in_features": 3584,
|
@@ -320,7 +320,7 @@
|
|
320 |
"model.layers.1.self_attn.q_proj": {
|
321 |
"bias": true,
|
322 |
"enable_norm": true,
|
323 |
-
"enable_perm":
|
324 |
"group_num": 1,
|
325 |
"group_size": 3584,
|
326 |
"in_features": 3584,
|
@@ -344,7 +344,7 @@
|
|
344 |
"model.layers.1.self_attn.v_proj": {
|
345 |
"bias": true,
|
346 |
"enable_norm": true,
|
347 |
-
"enable_perm":
|
348 |
"group_num": 1,
|
349 |
"group_size": 3584,
|
350 |
"in_features": 3584,
|
@@ -368,7 +368,7 @@
|
|
368 |
"model.layers.10.mlp.down_proj": {
|
369 |
"bias": null,
|
370 |
"enable_norm": true,
|
371 |
-
"enable_perm":
|
372 |
"group_num": 1,
|
373 |
"group_size": 18944,
|
374 |
"in_features": 18944,
|
@@ -392,7 +392,7 @@
|
|
392 |
"model.layers.10.mlp.gate_proj": {
|
393 |
"bias": null,
|
394 |
"enable_norm": true,
|
395 |
-
"enable_perm":
|
396 |
"group_num": 1,
|
397 |
"group_size": 3584,
|
398 |
"in_features": 3584,
|
@@ -416,7 +416,7 @@
|
|
416 |
"model.layers.10.mlp.up_proj": {
|
417 |
"bias": null,
|
418 |
"enable_norm": true,
|
419 |
-
"enable_perm":
|
420 |
"group_num": 1,
|
421 |
"group_size": 3584,
|
422 |
"in_features": 3584,
|
@@ -440,7 +440,7 @@
|
|
440 |
"model.layers.10.self_attn.k_proj": {
|
441 |
"bias": true,
|
442 |
"enable_norm": true,
|
443 |
-
"enable_perm":
|
444 |
"group_num": 1,
|
445 |
"group_size": 3584,
|
446 |
"in_features": 3584,
|
@@ -464,7 +464,7 @@
|
|
464 |
"model.layers.10.self_attn.o_proj": {
|
465 |
"bias": null,
|
466 |
"enable_norm": true,
|
467 |
-
"enable_perm":
|
468 |
"group_num": 1,
|
469 |
"group_size": 3584,
|
470 |
"in_features": 3584,
|
@@ -488,7 +488,7 @@
|
|
488 |
"model.layers.10.self_attn.q_proj": {
|
489 |
"bias": true,
|
490 |
"enable_norm": true,
|
491 |
-
"enable_perm":
|
492 |
"group_num": 1,
|
493 |
"group_size": 3584,
|
494 |
"in_features": 3584,
|
@@ -512,7 +512,7 @@
|
|
512 |
"model.layers.10.self_attn.v_proj": {
|
513 |
"bias": true,
|
514 |
"enable_norm": true,
|
515 |
-
"enable_perm":
|
516 |
"group_num": 1,
|
517 |
"group_size": 3584,
|
518 |
"in_features": 3584,
|
@@ -536,7 +536,7 @@
|
|
536 |
"model.layers.11.mlp.down_proj": {
|
537 |
"bias": null,
|
538 |
"enable_norm": true,
|
539 |
-
"enable_perm":
|
540 |
"group_num": 1,
|
541 |
"group_size": 18944,
|
542 |
"in_features": 18944,
|
@@ -560,7 +560,7 @@
|
|
560 |
"model.layers.11.mlp.gate_proj": {
|
561 |
"bias": null,
|
562 |
"enable_norm": true,
|
563 |
-
"enable_perm":
|
564 |
"group_num": 1,
|
565 |
"group_size": 3584,
|
566 |
"in_features": 3584,
|
@@ -584,7 +584,7 @@
|
|
584 |
"model.layers.11.mlp.up_proj": {
|
585 |
"bias": null,
|
586 |
"enable_norm": true,
|
587 |
-
"enable_perm":
|
588 |
"group_num": 1,
|
589 |
"group_size": 3584,
|
590 |
"in_features": 3584,
|
@@ -608,7 +608,7 @@
|
|
608 |
"model.layers.11.self_attn.k_proj": {
|
609 |
"bias": true,
|
610 |
"enable_norm": true,
|
611 |
-
"enable_perm":
|
612 |
"group_num": 1,
|
613 |
"group_size": 3584,
|
614 |
"in_features": 3584,
|
@@ -632,7 +632,7 @@
|
|
632 |
"model.layers.11.self_attn.o_proj": {
|
633 |
"bias": null,
|
634 |
"enable_norm": true,
|
635 |
-
"enable_perm":
|
636 |
"group_num": 1,
|
637 |
"group_size": 3584,
|
638 |
"in_features": 3584,
|
@@ -656,7 +656,7 @@
|
|
656 |
"model.layers.11.self_attn.q_proj": {
|
657 |
"bias": true,
|
658 |
"enable_norm": true,
|
659 |
-
"enable_perm":
|
660 |
"group_num": 1,
|
661 |
"group_size": 3584,
|
662 |
"in_features": 3584,
|
@@ -680,7 +680,7 @@
|
|
680 |
"model.layers.11.self_attn.v_proj": {
|
681 |
"bias": true,
|
682 |
"enable_norm": true,
|
683 |
-
"enable_perm":
|
684 |
"group_num": 1,
|
685 |
"group_size": 3584,
|
686 |
"in_features": 3584,
|
@@ -704,7 +704,7 @@
|
|
704 |
"model.layers.12.mlp.down_proj": {
|
705 |
"bias": null,
|
706 |
"enable_norm": true,
|
707 |
-
"enable_perm":
|
708 |
"group_num": 1,
|
709 |
"group_size": 18944,
|
710 |
"in_features": 18944,
|
@@ -728,7 +728,7 @@
|
|
728 |
"model.layers.12.mlp.gate_proj": {
|
729 |
"bias": null,
|
730 |
"enable_norm": true,
|
731 |
-
"enable_perm":
|
732 |
"group_num": 1,
|
733 |
"group_size": 3584,
|
734 |
"in_features": 3584,
|
@@ -752,7 +752,7 @@
|
|
752 |
"model.layers.12.mlp.up_proj": {
|
753 |
"bias": null,
|
754 |
"enable_norm": true,
|
755 |
-
"enable_perm":
|
756 |
"group_num": 1,
|
757 |
"group_size": 3584,
|
758 |
"in_features": 3584,
|
@@ -776,7 +776,7 @@
|
|
776 |
"model.layers.12.self_attn.k_proj": {
|
777 |
"bias": true,
|
778 |
"enable_norm": true,
|
779 |
-
"enable_perm":
|
780 |
"group_num": 1,
|
781 |
"group_size": 3584,
|
782 |
"in_features": 3584,
|
@@ -800,7 +800,7 @@
|
|
800 |
"model.layers.12.self_attn.o_proj": {
|
801 |
"bias": null,
|
802 |
"enable_norm": true,
|
803 |
-
"enable_perm":
|
804 |
"group_num": 1,
|
805 |
"group_size": 3584,
|
806 |
"in_features": 3584,
|
@@ -824,7 +824,7 @@
|
|
824 |
"model.layers.12.self_attn.q_proj": {
|
825 |
"bias": true,
|
826 |
"enable_norm": true,
|
827 |
-
"enable_perm":
|
828 |
"group_num": 1,
|
829 |
"group_size": 3584,
|
830 |
"in_features": 3584,
|
@@ -848,7 +848,7 @@
|
|
848 |
"model.layers.12.self_attn.v_proj": {
|
849 |
"bias": true,
|
850 |
"enable_norm": true,
|
851 |
-
"enable_perm":
|
852 |
"group_num": 1,
|
853 |
"group_size": 3584,
|
854 |
"in_features": 3584,
|
@@ -872,7 +872,7 @@
|
|
872 |
"model.layers.13.mlp.down_proj": {
|
873 |
"bias": null,
|
874 |
"enable_norm": true,
|
875 |
-
"enable_perm":
|
876 |
"group_num": 1,
|
877 |
"group_size": 18944,
|
878 |
"in_features": 18944,
|
@@ -896,7 +896,7 @@
|
|
896 |
"model.layers.13.mlp.gate_proj": {
|
897 |
"bias": null,
|
898 |
"enable_norm": true,
|
899 |
-
"enable_perm":
|
900 |
"group_num": 1,
|
901 |
"group_size": 3584,
|
902 |
"in_features": 3584,
|
@@ -920,7 +920,7 @@
|
|
920 |
"model.layers.13.mlp.up_proj": {
|
921 |
"bias": null,
|
922 |
"enable_norm": true,
|
923 |
-
"enable_perm":
|
924 |
"group_num": 1,
|
925 |
"group_size": 3584,
|
926 |
"in_features": 3584,
|
@@ -944,7 +944,7 @@
|
|
944 |
"model.layers.13.self_attn.k_proj": {
|
945 |
"bias": true,
|
946 |
"enable_norm": true,
|
947 |
-
"enable_perm":
|
948 |
"group_num": 1,
|
949 |
"group_size": 3584,
|
950 |
"in_features": 3584,
|
@@ -968,7 +968,7 @@
|
|
968 |
"model.layers.13.self_attn.o_proj": {
|
969 |
"bias": null,
|
970 |
"enable_norm": true,
|
971 |
-
"enable_perm":
|
972 |
"group_num": 1,
|
973 |
"group_size": 3584,
|
974 |
"in_features": 3584,
|
@@ -992,7 +992,7 @@
|
|
992 |
"model.layers.13.self_attn.q_proj": {
|
993 |
"bias": true,
|
994 |
"enable_norm": true,
|
995 |
-
"enable_perm":
|
996 |
"group_num": 1,
|
997 |
"group_size": 3584,
|
998 |
"in_features": 3584,
|
@@ -1016,7 +1016,7 @@
|
|
1016 |
"model.layers.13.self_attn.v_proj": {
|
1017 |
"bias": true,
|
1018 |
"enable_norm": true,
|
1019 |
-
"enable_perm":
|
1020 |
"group_num": 1,
|
1021 |
"group_size": 3584,
|
1022 |
"in_features": 3584,
|
@@ -1040,7 +1040,7 @@
|
|
1040 |
"model.layers.14.mlp.down_proj": {
|
1041 |
"bias": null,
|
1042 |
"enable_norm": true,
|
1043 |
-
"enable_perm":
|
1044 |
"group_num": 1,
|
1045 |
"group_size": 18944,
|
1046 |
"in_features": 18944,
|
@@ -1064,7 +1064,7 @@
|
|
1064 |
"model.layers.14.mlp.gate_proj": {
|
1065 |
"bias": null,
|
1066 |
"enable_norm": true,
|
1067 |
-
"enable_perm":
|
1068 |
"group_num": 1,
|
1069 |
"group_size": 3584,
|
1070 |
"in_features": 3584,
|
@@ -1088,7 +1088,7 @@
|
|
1088 |
"model.layers.14.mlp.up_proj": {
|
1089 |
"bias": null,
|
1090 |
"enable_norm": true,
|
1091 |
-
"enable_perm":
|
1092 |
"group_num": 1,
|
1093 |
"group_size": 3584,
|
1094 |
"in_features": 3584,
|
@@ -1112,7 +1112,7 @@
|
|
1112 |
"model.layers.14.self_attn.k_proj": {
|
1113 |
"bias": true,
|
1114 |
"enable_norm": true,
|
1115 |
-
"enable_perm":
|
1116 |
"group_num": 1,
|
1117 |
"group_size": 3584,
|
1118 |
"in_features": 3584,
|
@@ -1136,7 +1136,7 @@
|
|
1136 |
"model.layers.14.self_attn.o_proj": {
|
1137 |
"bias": null,
|
1138 |
"enable_norm": true,
|
1139 |
-
"enable_perm":
|
1140 |
"group_num": 1,
|
1141 |
"group_size": 3584,
|
1142 |
"in_features": 3584,
|
@@ -1160,7 +1160,7 @@
|
|
1160 |
"model.layers.14.self_attn.q_proj": {
|
1161 |
"bias": true,
|
1162 |
"enable_norm": true,
|
1163 |
-
"enable_perm":
|
1164 |
"group_num": 1,
|
1165 |
"group_size": 3584,
|
1166 |
"in_features": 3584,
|
@@ -1184,7 +1184,7 @@
|
|
1184 |
"model.layers.14.self_attn.v_proj": {
|
1185 |
"bias": true,
|
1186 |
"enable_norm": true,
|
1187 |
-
"enable_perm":
|
1188 |
"group_num": 1,
|
1189 |
"group_size": 3584,
|
1190 |
"in_features": 3584,
|
@@ -1208,7 +1208,7 @@
|
|
1208 |
"model.layers.15.mlp.down_proj": {
|
1209 |
"bias": null,
|
1210 |
"enable_norm": true,
|
1211 |
-
"enable_perm":
|
1212 |
"group_num": 1,
|
1213 |
"group_size": 18944,
|
1214 |
"in_features": 18944,
|
@@ -1232,7 +1232,7 @@
|
|
1232 |
"model.layers.15.mlp.gate_proj": {
|
1233 |
"bias": null,
|
1234 |
"enable_norm": true,
|
1235 |
-
"enable_perm":
|
1236 |
"group_num": 1,
|
1237 |
"group_size": 3584,
|
1238 |
"in_features": 3584,
|
@@ -1256,7 +1256,7 @@
|
|
1256 |
"model.layers.15.mlp.up_proj": {
|
1257 |
"bias": null,
|
1258 |
"enable_norm": true,
|
1259 |
-
"enable_perm":
|
1260 |
"group_num": 1,
|
1261 |
"group_size": 3584,
|
1262 |
"in_features": 3584,
|
@@ -1280,7 +1280,7 @@
|
|
1280 |
"model.layers.15.self_attn.k_proj": {
|
1281 |
"bias": true,
|
1282 |
"enable_norm": true,
|
1283 |
-
"enable_perm":
|
1284 |
"group_num": 1,
|
1285 |
"group_size": 3584,
|
1286 |
"in_features": 3584,
|
@@ -1304,7 +1304,7 @@
|
|
1304 |
"model.layers.15.self_attn.o_proj": {
|
1305 |
"bias": null,
|
1306 |
"enable_norm": true,
|
1307 |
-
"enable_perm":
|
1308 |
"group_num": 1,
|
1309 |
"group_size": 3584,
|
1310 |
"in_features": 3584,
|
@@ -1328,7 +1328,7 @@
|
|
1328 |
"model.layers.15.self_attn.q_proj": {
|
1329 |
"bias": true,
|
1330 |
"enable_norm": true,
|
1331 |
-
"enable_perm":
|
1332 |
"group_num": 1,
|
1333 |
"group_size": 3584,
|
1334 |
"in_features": 3584,
|
@@ -1352,7 +1352,7 @@
|
|
1352 |
"model.layers.15.self_attn.v_proj": {
|
1353 |
"bias": true,
|
1354 |
"enable_norm": true,
|
1355 |
-
"enable_perm":
|
1356 |
"group_num": 1,
|
1357 |
"group_size": 3584,
|
1358 |
"in_features": 3584,
|
@@ -1376,7 +1376,7 @@
|
|
1376 |
"model.layers.16.mlp.down_proj": {
|
1377 |
"bias": null,
|
1378 |
"enable_norm": true,
|
1379 |
-
"enable_perm":
|
1380 |
"group_num": 1,
|
1381 |
"group_size": 18944,
|
1382 |
"in_features": 18944,
|
@@ -1400,7 +1400,7 @@
|
|
1400 |
"model.layers.16.mlp.gate_proj": {
|
1401 |
"bias": null,
|
1402 |
"enable_norm": true,
|
1403 |
-
"enable_perm":
|
1404 |
"group_num": 1,
|
1405 |
"group_size": 3584,
|
1406 |
"in_features": 3584,
|
@@ -1424,7 +1424,7 @@
|
|
1424 |
"model.layers.16.mlp.up_proj": {
|
1425 |
"bias": null,
|
1426 |
"enable_norm": true,
|
1427 |
-
"enable_perm":
|
1428 |
"group_num": 1,
|
1429 |
"group_size": 3584,
|
1430 |
"in_features": 3584,
|
@@ -1448,7 +1448,7 @@
|
|
1448 |
"model.layers.16.self_attn.k_proj": {
|
1449 |
"bias": true,
|
1450 |
"enable_norm": true,
|
1451 |
-
"enable_perm":
|
1452 |
"group_num": 1,
|
1453 |
"group_size": 3584,
|
1454 |
"in_features": 3584,
|
@@ -1472,7 +1472,7 @@
|
|
1472 |
"model.layers.16.self_attn.o_proj": {
|
1473 |
"bias": null,
|
1474 |
"enable_norm": true,
|
1475 |
-
"enable_perm":
|
1476 |
"group_num": 1,
|
1477 |
"group_size": 3584,
|
1478 |
"in_features": 3584,
|
@@ -1496,7 +1496,7 @@
|
|
1496 |
"model.layers.16.self_attn.q_proj": {
|
1497 |
"bias": true,
|
1498 |
"enable_norm": true,
|
1499 |
-
"enable_perm":
|
1500 |
"group_num": 1,
|
1501 |
"group_size": 3584,
|
1502 |
"in_features": 3584,
|
@@ -1520,7 +1520,7 @@
|
|
1520 |
"model.layers.16.self_attn.v_proj": {
|
1521 |
"bias": true,
|
1522 |
"enable_norm": true,
|
1523 |
-
"enable_perm":
|
1524 |
"group_num": 1,
|
1525 |
"group_size": 3584,
|
1526 |
"in_features": 3584,
|
@@ -1544,7 +1544,7 @@
|
|
1544 |
"model.layers.17.mlp.down_proj": {
|
1545 |
"bias": null,
|
1546 |
"enable_norm": true,
|
1547 |
-
"enable_perm":
|
1548 |
"group_num": 1,
|
1549 |
"group_size": 18944,
|
1550 |
"in_features": 18944,
|
@@ -1568,7 +1568,7 @@
|
|
1568 |
"model.layers.17.mlp.gate_proj": {
|
1569 |
"bias": null,
|
1570 |
"enable_norm": true,
|
1571 |
-
"enable_perm":
|
1572 |
"group_num": 1,
|
1573 |
"group_size": 3584,
|
1574 |
"in_features": 3584,
|
@@ -1592,7 +1592,7 @@
|
|
1592 |
"model.layers.17.mlp.up_proj": {
|
1593 |
"bias": null,
|
1594 |
"enable_norm": true,
|
1595 |
-
"enable_perm":
|
1596 |
"group_num": 1,
|
1597 |
"group_size": 3584,
|
1598 |
"in_features": 3584,
|
@@ -1616,7 +1616,7 @@
|
|
1616 |
"model.layers.17.self_attn.k_proj": {
|
1617 |
"bias": true,
|
1618 |
"enable_norm": true,
|
1619 |
-
"enable_perm":
|
1620 |
"group_num": 1,
|
1621 |
"group_size": 3584,
|
1622 |
"in_features": 3584,
|
@@ -1640,7 +1640,7 @@
|
|
1640 |
"model.layers.17.self_attn.o_proj": {
|
1641 |
"bias": null,
|
1642 |
"enable_norm": true,
|
1643 |
-
"enable_perm":
|
1644 |
"group_num": 1,
|
1645 |
"group_size": 3584,
|
1646 |
"in_features": 3584,
|
@@ -1664,7 +1664,7 @@
|
|
1664 |
"model.layers.17.self_attn.q_proj": {
|
1665 |
"bias": true,
|
1666 |
"enable_norm": true,
|
1667 |
-
"enable_perm":
|
1668 |
"group_num": 1,
|
1669 |
"group_size": 3584,
|
1670 |
"in_features": 3584,
|
@@ -1688,7 +1688,7 @@
|
|
1688 |
"model.layers.17.self_attn.v_proj": {
|
1689 |
"bias": true,
|
1690 |
"enable_norm": true,
|
1691 |
-
"enable_perm":
|
1692 |
"group_num": 1,
|
1693 |
"group_size": 3584,
|
1694 |
"in_features": 3584,
|
@@ -1712,7 +1712,7 @@
|
|
1712 |
"model.layers.18.mlp.down_proj": {
|
1713 |
"bias": null,
|
1714 |
"enable_norm": true,
|
1715 |
-
"enable_perm":
|
1716 |
"group_num": 1,
|
1717 |
"group_size": 18944,
|
1718 |
"in_features": 18944,
|
@@ -1736,7 +1736,7 @@
|
|
1736 |
"model.layers.18.mlp.gate_proj": {
|
1737 |
"bias": null,
|
1738 |
"enable_norm": true,
|
1739 |
-
"enable_perm":
|
1740 |
"group_num": 1,
|
1741 |
"group_size": 3584,
|
1742 |
"in_features": 3584,
|
@@ -1760,7 +1760,7 @@
|
|
1760 |
"model.layers.18.mlp.up_proj": {
|
1761 |
"bias": null,
|
1762 |
"enable_norm": true,
|
1763 |
-
"enable_perm":
|
1764 |
"group_num": 1,
|
1765 |
"group_size": 3584,
|
1766 |
"in_features": 3584,
|
@@ -1784,7 +1784,7 @@
|
|
1784 |
"model.layers.18.self_attn.k_proj": {
|
1785 |
"bias": true,
|
1786 |
"enable_norm": true,
|
1787 |
-
"enable_perm":
|
1788 |
"group_num": 1,
|
1789 |
"group_size": 3584,
|
1790 |
"in_features": 3584,
|
@@ -1808,7 +1808,7 @@
|
|
1808 |
"model.layers.18.self_attn.o_proj": {
|
1809 |
"bias": null,
|
1810 |
"enable_norm": true,
|
1811 |
-
"enable_perm":
|
1812 |
"group_num": 1,
|
1813 |
"group_size": 3584,
|
1814 |
"in_features": 3584,
|
@@ -1832,7 +1832,7 @@
|
|
1832 |
"model.layers.18.self_attn.q_proj": {
|
1833 |
"bias": true,
|
1834 |
"enable_norm": true,
|
1835 |
-
"enable_perm":
|
1836 |
"group_num": 1,
|
1837 |
"group_size": 3584,
|
1838 |
"in_features": 3584,
|
@@ -1856,7 +1856,7 @@
|
|
1856 |
"model.layers.18.self_attn.v_proj": {
|
1857 |
"bias": true,
|
1858 |
"enable_norm": true,
|
1859 |
-
"enable_perm":
|
1860 |
"group_num": 1,
|
1861 |
"group_size": 3584,
|
1862 |
"in_features": 3584,
|
@@ -1880,7 +1880,7 @@
|
|
1880 |
"model.layers.19.mlp.down_proj": {
|
1881 |
"bias": null,
|
1882 |
"enable_norm": true,
|
1883 |
-
"enable_perm":
|
1884 |
"group_num": 1,
|
1885 |
"group_size": 18944,
|
1886 |
"in_features": 18944,
|
@@ -1904,7 +1904,7 @@
|
|
1904 |
"model.layers.19.mlp.gate_proj": {
|
1905 |
"bias": null,
|
1906 |
"enable_norm": true,
|
1907 |
-
"enable_perm":
|
1908 |
"group_num": 1,
|
1909 |
"group_size": 3584,
|
1910 |
"in_features": 3584,
|
@@ -1928,7 +1928,7 @@
|
|
1928 |
"model.layers.19.mlp.up_proj": {
|
1929 |
"bias": null,
|
1930 |
"enable_norm": true,
|
1931 |
-
"enable_perm":
|
1932 |
"group_num": 1,
|
1933 |
"group_size": 3584,
|
1934 |
"in_features": 3584,
|
@@ -1952,7 +1952,7 @@
|
|
1952 |
"model.layers.19.self_attn.k_proj": {
|
1953 |
"bias": true,
|
1954 |
"enable_norm": true,
|
1955 |
-
"enable_perm":
|
1956 |
"group_num": 1,
|
1957 |
"group_size": 3584,
|
1958 |
"in_features": 3584,
|
@@ -1976,7 +1976,7 @@
|
|
1976 |
"model.layers.19.self_attn.o_proj": {
|
1977 |
"bias": null,
|
1978 |
"enable_norm": true,
|
1979 |
-
"enable_perm":
|
1980 |
"group_num": 1,
|
1981 |
"group_size": 3584,
|
1982 |
"in_features": 3584,
|
@@ -2000,7 +2000,7 @@
|
|
2000 |
"model.layers.19.self_attn.q_proj": {
|
2001 |
"bias": true,
|
2002 |
"enable_norm": true,
|
2003 |
-
"enable_perm":
|
2004 |
"group_num": 1,
|
2005 |
"group_size": 3584,
|
2006 |
"in_features": 3584,
|
@@ -2024,7 +2024,7 @@
|
|
2024 |
"model.layers.19.self_attn.v_proj": {
|
2025 |
"bias": true,
|
2026 |
"enable_norm": true,
|
2027 |
-
"enable_perm":
|
2028 |
"group_num": 1,
|
2029 |
"group_size": 3584,
|
2030 |
"in_features": 3584,
|
@@ -2048,7 +2048,7 @@
|
|
2048 |
"model.layers.2.mlp.down_proj": {
|
2049 |
"bias": null,
|
2050 |
"enable_norm": true,
|
2051 |
-
"enable_perm":
|
2052 |
"group_num": 1,
|
2053 |
"group_size": 18944,
|
2054 |
"in_features": 18944,
|
@@ -2072,7 +2072,7 @@
|
|
2072 |
"model.layers.2.mlp.gate_proj": {
|
2073 |
"bias": null,
|
2074 |
"enable_norm": true,
|
2075 |
-
"enable_perm":
|
2076 |
"group_num": 1,
|
2077 |
"group_size": 3584,
|
2078 |
"in_features": 3584,
|
@@ -2096,7 +2096,7 @@
|
|
2096 |
"model.layers.2.mlp.up_proj": {
|
2097 |
"bias": null,
|
2098 |
"enable_norm": true,
|
2099 |
-
"enable_perm":
|
2100 |
"group_num": 1,
|
2101 |
"group_size": 3584,
|
2102 |
"in_features": 3584,
|
@@ -2120,7 +2120,7 @@
|
|
2120 |
"model.layers.2.self_attn.k_proj": {
|
2121 |
"bias": true,
|
2122 |
"enable_norm": true,
|
2123 |
-
"enable_perm":
|
2124 |
"group_num": 1,
|
2125 |
"group_size": 3584,
|
2126 |
"in_features": 3584,
|
@@ -2144,7 +2144,7 @@
|
|
2144 |
"model.layers.2.self_attn.o_proj": {
|
2145 |
"bias": null,
|
2146 |
"enable_norm": true,
|
2147 |
-
"enable_perm":
|
2148 |
"group_num": 1,
|
2149 |
"group_size": 3584,
|
2150 |
"in_features": 3584,
|
@@ -2168,7 +2168,7 @@
|
|
2168 |
"model.layers.2.self_attn.q_proj": {
|
2169 |
"bias": true,
|
2170 |
"enable_norm": true,
|
2171 |
-
"enable_perm":
|
2172 |
"group_num": 1,
|
2173 |
"group_size": 3584,
|
2174 |
"in_features": 3584,
|
@@ -2192,7 +2192,7 @@
|
|
2192 |
"model.layers.2.self_attn.v_proj": {
|
2193 |
"bias": true,
|
2194 |
"enable_norm": true,
|
2195 |
-
"enable_perm":
|
2196 |
"group_num": 1,
|
2197 |
"group_size": 3584,
|
2198 |
"in_features": 3584,
|
@@ -2216,7 +2216,7 @@
|
|
2216 |
"model.layers.20.mlp.down_proj": {
|
2217 |
"bias": null,
|
2218 |
"enable_norm": true,
|
2219 |
-
"enable_perm":
|
2220 |
"group_num": 1,
|
2221 |
"group_size": 18944,
|
2222 |
"in_features": 18944,
|
@@ -2240,7 +2240,7 @@
|
|
2240 |
"model.layers.20.mlp.gate_proj": {
|
2241 |
"bias": null,
|
2242 |
"enable_norm": true,
|
2243 |
-
"enable_perm":
|
2244 |
"group_num": 1,
|
2245 |
"group_size": 3584,
|
2246 |
"in_features": 3584,
|
@@ -2264,7 +2264,7 @@
|
|
2264 |
"model.layers.20.mlp.up_proj": {
|
2265 |
"bias": null,
|
2266 |
"enable_norm": true,
|
2267 |
-
"enable_perm":
|
2268 |
"group_num": 1,
|
2269 |
"group_size": 3584,
|
2270 |
"in_features": 3584,
|
@@ -2288,7 +2288,7 @@
|
|
2288 |
"model.layers.20.self_attn.k_proj": {
|
2289 |
"bias": true,
|
2290 |
"enable_norm": true,
|
2291 |
-
"enable_perm":
|
2292 |
"group_num": 1,
|
2293 |
"group_size": 3584,
|
2294 |
"in_features": 3584,
|
@@ -2312,7 +2312,7 @@
|
|
2312 |
"model.layers.20.self_attn.o_proj": {
|
2313 |
"bias": null,
|
2314 |
"enable_norm": true,
|
2315 |
-
"enable_perm":
|
2316 |
"group_num": 1,
|
2317 |
"group_size": 3584,
|
2318 |
"in_features": 3584,
|
@@ -2336,7 +2336,7 @@
|
|
2336 |
"model.layers.20.self_attn.q_proj": {
|
2337 |
"bias": true,
|
2338 |
"enable_norm": true,
|
2339 |
-
"enable_perm":
|
2340 |
"group_num": 1,
|
2341 |
"group_size": 3584,
|
2342 |
"in_features": 3584,
|
@@ -2360,7 +2360,7 @@
|
|
2360 |
"model.layers.20.self_attn.v_proj": {
|
2361 |
"bias": true,
|
2362 |
"enable_norm": true,
|
2363 |
-
"enable_perm":
|
2364 |
"group_num": 1,
|
2365 |
"group_size": 3584,
|
2366 |
"in_features": 3584,
|
@@ -2384,7 +2384,7 @@
|
|
2384 |
"model.layers.21.mlp.down_proj": {
|
2385 |
"bias": null,
|
2386 |
"enable_norm": true,
|
2387 |
-
"enable_perm":
|
2388 |
"group_num": 1,
|
2389 |
"group_size": 18944,
|
2390 |
"in_features": 18944,
|
@@ -2408,7 +2408,7 @@
|
|
2408 |
"model.layers.21.mlp.gate_proj": {
|
2409 |
"bias": null,
|
2410 |
"enable_norm": true,
|
2411 |
-
"enable_perm":
|
2412 |
"group_num": 1,
|
2413 |
"group_size": 3584,
|
2414 |
"in_features": 3584,
|
@@ -2432,7 +2432,7 @@
|
|
2432 |
"model.layers.21.mlp.up_proj": {
|
2433 |
"bias": null,
|
2434 |
"enable_norm": true,
|
2435 |
-
"enable_perm":
|
2436 |
"group_num": 1,
|
2437 |
"group_size": 3584,
|
2438 |
"in_features": 3584,
|
@@ -2456,7 +2456,7 @@
|
|
2456 |
"model.layers.21.self_attn.k_proj": {
|
2457 |
"bias": true,
|
2458 |
"enable_norm": true,
|
2459 |
-
"enable_perm":
|
2460 |
"group_num": 1,
|
2461 |
"group_size": 3584,
|
2462 |
"in_features": 3584,
|
@@ -2480,7 +2480,7 @@
|
|
2480 |
"model.layers.21.self_attn.o_proj": {
|
2481 |
"bias": null,
|
2482 |
"enable_norm": true,
|
2483 |
-
"enable_perm":
|
2484 |
"group_num": 1,
|
2485 |
"group_size": 3584,
|
2486 |
"in_features": 3584,
|
@@ -2504,7 +2504,7 @@
|
|
2504 |
"model.layers.21.self_attn.q_proj": {
|
2505 |
"bias": true,
|
2506 |
"enable_norm": true,
|
2507 |
-
"enable_perm":
|
2508 |
"group_num": 1,
|
2509 |
"group_size": 3584,
|
2510 |
"in_features": 3584,
|
@@ -2528,7 +2528,7 @@
|
|
2528 |
"model.layers.21.self_attn.v_proj": {
|
2529 |
"bias": true,
|
2530 |
"enable_norm": true,
|
2531 |
-
"enable_perm":
|
2532 |
"group_num": 1,
|
2533 |
"group_size": 3584,
|
2534 |
"in_features": 3584,
|
@@ -2552,7 +2552,7 @@
|
|
2552 |
"model.layers.22.mlp.down_proj": {
|
2553 |
"bias": null,
|
2554 |
"enable_norm": true,
|
2555 |
-
"enable_perm":
|
2556 |
"group_num": 1,
|
2557 |
"group_size": 18944,
|
2558 |
"in_features": 18944,
|
@@ -2576,7 +2576,7 @@
|
|
2576 |
"model.layers.22.mlp.gate_proj": {
|
2577 |
"bias": null,
|
2578 |
"enable_norm": true,
|
2579 |
-
"enable_perm":
|
2580 |
"group_num": 1,
|
2581 |
"group_size": 3584,
|
2582 |
"in_features": 3584,
|
@@ -2600,7 +2600,7 @@
|
|
2600 |
"model.layers.22.mlp.up_proj": {
|
2601 |
"bias": null,
|
2602 |
"enable_norm": true,
|
2603 |
-
"enable_perm":
|
2604 |
"group_num": 1,
|
2605 |
"group_size": 3584,
|
2606 |
"in_features": 3584,
|
@@ -2624,7 +2624,7 @@
|
|
2624 |
"model.layers.22.self_attn.k_proj": {
|
2625 |
"bias": true,
|
2626 |
"enable_norm": true,
|
2627 |
-
"enable_perm":
|
2628 |
"group_num": 1,
|
2629 |
"group_size": 3584,
|
2630 |
"in_features": 3584,
|
@@ -2648,7 +2648,7 @@
|
|
2648 |
"model.layers.22.self_attn.o_proj": {
|
2649 |
"bias": null,
|
2650 |
"enable_norm": true,
|
2651 |
-
"enable_perm":
|
2652 |
"group_num": 1,
|
2653 |
"group_size": 3584,
|
2654 |
"in_features": 3584,
|
@@ -2672,7 +2672,7 @@
|
|
2672 |
"model.layers.22.self_attn.q_proj": {
|
2673 |
"bias": true,
|
2674 |
"enable_norm": true,
|
2675 |
-
"enable_perm":
|
2676 |
"group_num": 1,
|
2677 |
"group_size": 3584,
|
2678 |
"in_features": 3584,
|
@@ -2696,7 +2696,7 @@
|
|
2696 |
"model.layers.22.self_attn.v_proj": {
|
2697 |
"bias": true,
|
2698 |
"enable_norm": true,
|
2699 |
-
"enable_perm":
|
2700 |
"group_num": 1,
|
2701 |
"group_size": 3584,
|
2702 |
"in_features": 3584,
|
@@ -2720,7 +2720,7 @@
|
|
2720 |
"model.layers.23.mlp.down_proj": {
|
2721 |
"bias": null,
|
2722 |
"enable_norm": true,
|
2723 |
-
"enable_perm":
|
2724 |
"group_num": 1,
|
2725 |
"group_size": 18944,
|
2726 |
"in_features": 18944,
|
@@ -2744,7 +2744,7 @@
|
|
2744 |
"model.layers.23.mlp.gate_proj": {
|
2745 |
"bias": null,
|
2746 |
"enable_norm": true,
|
2747 |
-
"enable_perm":
|
2748 |
"group_num": 1,
|
2749 |
"group_size": 3584,
|
2750 |
"in_features": 3584,
|
@@ -2768,7 +2768,7 @@
|
|
2768 |
"model.layers.23.mlp.up_proj": {
|
2769 |
"bias": null,
|
2770 |
"enable_norm": true,
|
2771 |
-
"enable_perm":
|
2772 |
"group_num": 1,
|
2773 |
"group_size": 3584,
|
2774 |
"in_features": 3584,
|
@@ -2792,7 +2792,7 @@
|
|
2792 |
"model.layers.23.self_attn.k_proj": {
|
2793 |
"bias": true,
|
2794 |
"enable_norm": true,
|
2795 |
-
"enable_perm":
|
2796 |
"group_num": 1,
|
2797 |
"group_size": 3584,
|
2798 |
"in_features": 3584,
|
@@ -2816,7 +2816,7 @@
|
|
2816 |
"model.layers.23.self_attn.o_proj": {
|
2817 |
"bias": null,
|
2818 |
"enable_norm": true,
|
2819 |
-
"enable_perm":
|
2820 |
"group_num": 1,
|
2821 |
"group_size": 3584,
|
2822 |
"in_features": 3584,
|
@@ -2840,7 +2840,7 @@
|
|
2840 |
"model.layers.23.self_attn.q_proj": {
|
2841 |
"bias": true,
|
2842 |
"enable_norm": true,
|
2843 |
-
"enable_perm":
|
2844 |
"group_num": 1,
|
2845 |
"group_size": 3584,
|
2846 |
"in_features": 3584,
|
@@ -2864,7 +2864,7 @@
|
|
2864 |
"model.layers.23.self_attn.v_proj": {
|
2865 |
"bias": true,
|
2866 |
"enable_norm": true,
|
2867 |
-
"enable_perm":
|
2868 |
"group_num": 1,
|
2869 |
"group_size": 3584,
|
2870 |
"in_features": 3584,
|
@@ -2888,7 +2888,7 @@
|
|
2888 |
"model.layers.24.mlp.down_proj": {
|
2889 |
"bias": null,
|
2890 |
"enable_norm": true,
|
2891 |
-
"enable_perm":
|
2892 |
"group_num": 1,
|
2893 |
"group_size": 18944,
|
2894 |
"in_features": 18944,
|
@@ -2912,7 +2912,7 @@
|
|
2912 |
"model.layers.24.mlp.gate_proj": {
|
2913 |
"bias": null,
|
2914 |
"enable_norm": true,
|
2915 |
-
"enable_perm":
|
2916 |
"group_num": 1,
|
2917 |
"group_size": 3584,
|
2918 |
"in_features": 3584,
|
@@ -2936,7 +2936,7 @@
|
|
2936 |
"model.layers.24.mlp.up_proj": {
|
2937 |
"bias": null,
|
2938 |
"enable_norm": true,
|
2939 |
-
"enable_perm":
|
2940 |
"group_num": 1,
|
2941 |
"group_size": 3584,
|
2942 |
"in_features": 3584,
|
@@ -2960,7 +2960,7 @@
|
|
2960 |
"model.layers.24.self_attn.k_proj": {
|
2961 |
"bias": true,
|
2962 |
"enable_norm": true,
|
2963 |
-
"enable_perm":
|
2964 |
"group_num": 1,
|
2965 |
"group_size": 3584,
|
2966 |
"in_features": 3584,
|
@@ -2984,7 +2984,7 @@
|
|
2984 |
"model.layers.24.self_attn.o_proj": {
|
2985 |
"bias": null,
|
2986 |
"enable_norm": true,
|
2987 |
-
"enable_perm":
|
2988 |
"group_num": 1,
|
2989 |
"group_size": 3584,
|
2990 |
"in_features": 3584,
|
@@ -3008,7 +3008,7 @@
|
|
3008 |
"model.layers.24.self_attn.q_proj": {
|
3009 |
"bias": true,
|
3010 |
"enable_norm": true,
|
3011 |
-
"enable_perm":
|
3012 |
"group_num": 1,
|
3013 |
"group_size": 3584,
|
3014 |
"in_features": 3584,
|
@@ -3032,7 +3032,7 @@
|
|
3032 |
"model.layers.24.self_attn.v_proj": {
|
3033 |
"bias": true,
|
3034 |
"enable_norm": true,
|
3035 |
-
"enable_perm":
|
3036 |
"group_num": 1,
|
3037 |
"group_size": 3584,
|
3038 |
"in_features": 3584,
|
@@ -3056,7 +3056,7 @@
|
|
3056 |
"model.layers.25.mlp.down_proj": {
|
3057 |
"bias": null,
|
3058 |
"enable_norm": true,
|
3059 |
-
"enable_perm":
|
3060 |
"group_num": 1,
|
3061 |
"group_size": 18944,
|
3062 |
"in_features": 18944,
|
@@ -3080,7 +3080,7 @@
|
|
3080 |
"model.layers.25.mlp.gate_proj": {
|
3081 |
"bias": null,
|
3082 |
"enable_norm": true,
|
3083 |
-
"enable_perm":
|
3084 |
"group_num": 1,
|
3085 |
"group_size": 3584,
|
3086 |
"in_features": 3584,
|
@@ -3104,7 +3104,7 @@
|
|
3104 |
"model.layers.25.mlp.up_proj": {
|
3105 |
"bias": null,
|
3106 |
"enable_norm": true,
|
3107 |
-
"enable_perm":
|
3108 |
"group_num": 1,
|
3109 |
"group_size": 3584,
|
3110 |
"in_features": 3584,
|
@@ -3128,7 +3128,7 @@
|
|
3128 |
"model.layers.25.self_attn.k_proj": {
|
3129 |
"bias": true,
|
3130 |
"enable_norm": true,
|
3131 |
-
"enable_perm":
|
3132 |
"group_num": 1,
|
3133 |
"group_size": 3584,
|
3134 |
"in_features": 3584,
|
@@ -3152,7 +3152,7 @@
|
|
3152 |
"model.layers.25.self_attn.o_proj": {
|
3153 |
"bias": null,
|
3154 |
"enable_norm": true,
|
3155 |
-
"enable_perm":
|
3156 |
"group_num": 1,
|
3157 |
"group_size": 3584,
|
3158 |
"in_features": 3584,
|
@@ -3176,7 +3176,7 @@
|
|
3176 |
"model.layers.25.self_attn.q_proj": {
|
3177 |
"bias": true,
|
3178 |
"enable_norm": true,
|
3179 |
-
"enable_perm":
|
3180 |
"group_num": 1,
|
3181 |
"group_size": 3584,
|
3182 |
"in_features": 3584,
|
@@ -3200,7 +3200,7 @@
|
|
3200 |
"model.layers.25.self_attn.v_proj": {
|
3201 |
"bias": true,
|
3202 |
"enable_norm": true,
|
3203 |
-
"enable_perm":
|
3204 |
"group_num": 1,
|
3205 |
"group_size": 3584,
|
3206 |
"in_features": 3584,
|
@@ -3224,7 +3224,7 @@
|
|
3224 |
"model.layers.26.mlp.down_proj": {
|
3225 |
"bias": null,
|
3226 |
"enable_norm": true,
|
3227 |
-
"enable_perm":
|
3228 |
"group_num": 1,
|
3229 |
"group_size": 18944,
|
3230 |
"in_features": 18944,
|
@@ -3248,7 +3248,7 @@
|
|
3248 |
"model.layers.26.mlp.gate_proj": {
|
3249 |
"bias": null,
|
3250 |
"enable_norm": true,
|
3251 |
-
"enable_perm":
|
3252 |
"group_num": 1,
|
3253 |
"group_size": 3584,
|
3254 |
"in_features": 3584,
|
@@ -3272,7 +3272,7 @@
|
|
3272 |
"model.layers.26.mlp.up_proj": {
|
3273 |
"bias": null,
|
3274 |
"enable_norm": true,
|
3275 |
-
"enable_perm":
|
3276 |
"group_num": 1,
|
3277 |
"group_size": 3584,
|
3278 |
"in_features": 3584,
|
@@ -3296,7 +3296,7 @@
|
|
3296 |
"model.layers.26.self_attn.k_proj": {
|
3297 |
"bias": true,
|
3298 |
"enable_norm": true,
|
3299 |
-
"enable_perm":
|
3300 |
"group_num": 1,
|
3301 |
"group_size": 3584,
|
3302 |
"in_features": 3584,
|
@@ -3320,7 +3320,7 @@
|
|
3320 |
"model.layers.26.self_attn.o_proj": {
|
3321 |
"bias": null,
|
3322 |
"enable_norm": true,
|
3323 |
-
"enable_perm":
|
3324 |
"group_num": 1,
|
3325 |
"group_size": 3584,
|
3326 |
"in_features": 3584,
|
@@ -3344,7 +3344,7 @@
|
|
3344 |
"model.layers.26.self_attn.q_proj": {
|
3345 |
"bias": true,
|
3346 |
"enable_norm": true,
|
3347 |
-
"enable_perm":
|
3348 |
"group_num": 1,
|
3349 |
"group_size": 3584,
|
3350 |
"in_features": 3584,
|
@@ -3368,7 +3368,7 @@
|
|
3368 |
"model.layers.26.self_attn.v_proj": {
|
3369 |
"bias": true,
|
3370 |
"enable_norm": true,
|
3371 |
-
"enable_perm":
|
3372 |
"group_num": 1,
|
3373 |
"group_size": 3584,
|
3374 |
"in_features": 3584,
|
@@ -3392,7 +3392,7 @@
|
|
3392 |
"model.layers.27.mlp.down_proj": {
|
3393 |
"bias": null,
|
3394 |
"enable_norm": true,
|
3395 |
-
"enable_perm":
|
3396 |
"group_num": 1,
|
3397 |
"group_size": 18944,
|
3398 |
"in_features": 18944,
|
@@ -3416,7 +3416,7 @@
|
|
3416 |
"model.layers.27.mlp.gate_proj": {
|
3417 |
"bias": null,
|
3418 |
"enable_norm": true,
|
3419 |
-
"enable_perm":
|
3420 |
"group_num": 1,
|
3421 |
"group_size": 3584,
|
3422 |
"in_features": 3584,
|
@@ -3440,7 +3440,7 @@
|
|
3440 |
"model.layers.27.mlp.up_proj": {
|
3441 |
"bias": null,
|
3442 |
"enable_norm": true,
|
3443 |
-
"enable_perm":
|
3444 |
"group_num": 1,
|
3445 |
"group_size": 3584,
|
3446 |
"in_features": 3584,
|
@@ -3464,7 +3464,7 @@
|
|
3464 |
"model.layers.27.self_attn.k_proj": {
|
3465 |
"bias": true,
|
3466 |
"enable_norm": true,
|
3467 |
-
"enable_perm":
|
3468 |
"group_num": 1,
|
3469 |
"group_size": 3584,
|
3470 |
"in_features": 3584,
|
@@ -3488,7 +3488,7 @@
|
|
3488 |
"model.layers.27.self_attn.o_proj": {
|
3489 |
"bias": null,
|
3490 |
"enable_norm": true,
|
3491 |
-
"enable_perm":
|
3492 |
"group_num": 1,
|
3493 |
"group_size": 3584,
|
3494 |
"in_features": 3584,
|
@@ -3512,7 +3512,7 @@
|
|
3512 |
"model.layers.27.self_attn.q_proj": {
|
3513 |
"bias": true,
|
3514 |
"enable_norm": true,
|
3515 |
-
"enable_perm":
|
3516 |
"group_num": 1,
|
3517 |
"group_size": 3584,
|
3518 |
"in_features": 3584,
|
@@ -3536,7 +3536,7 @@
|
|
3536 |
"model.layers.27.self_attn.v_proj": {
|
3537 |
"bias": true,
|
3538 |
"enable_norm": true,
|
3539 |
-
"enable_perm":
|
3540 |
"group_num": 1,
|
3541 |
"group_size": 3584,
|
3542 |
"in_features": 3584,
|
@@ -3560,7 +3560,7 @@
|
|
3560 |
"model.layers.3.mlp.down_proj": {
|
3561 |
"bias": null,
|
3562 |
"enable_norm": true,
|
3563 |
-
"enable_perm":
|
3564 |
"group_num": 1,
|
3565 |
"group_size": 18944,
|
3566 |
"in_features": 18944,
|
@@ -3584,7 +3584,7 @@
|
|
3584 |
"model.layers.3.mlp.gate_proj": {
|
3585 |
"bias": null,
|
3586 |
"enable_norm": true,
|
3587 |
-
"enable_perm":
|
3588 |
"group_num": 1,
|
3589 |
"group_size": 3584,
|
3590 |
"in_features": 3584,
|
@@ -3608,7 +3608,7 @@
|
|
3608 |
"model.layers.3.mlp.up_proj": {
|
3609 |
"bias": null,
|
3610 |
"enable_norm": true,
|
3611 |
-
"enable_perm":
|
3612 |
"group_num": 1,
|
3613 |
"group_size": 3584,
|
3614 |
"in_features": 3584,
|
@@ -3632,7 +3632,7 @@
|
|
3632 |
"model.layers.3.self_attn.k_proj": {
|
3633 |
"bias": true,
|
3634 |
"enable_norm": true,
|
3635 |
-
"enable_perm":
|
3636 |
"group_num": 1,
|
3637 |
"group_size": 3584,
|
3638 |
"in_features": 3584,
|
@@ -3656,7 +3656,7 @@
|
|
3656 |
"model.layers.3.self_attn.o_proj": {
|
3657 |
"bias": null,
|
3658 |
"enable_norm": true,
|
3659 |
-
"enable_perm":
|
3660 |
"group_num": 1,
|
3661 |
"group_size": 3584,
|
3662 |
"in_features": 3584,
|
@@ -3680,7 +3680,7 @@
|
|
3680 |
"model.layers.3.self_attn.q_proj": {
|
3681 |
"bias": true,
|
3682 |
"enable_norm": true,
|
3683 |
-
"enable_perm":
|
3684 |
"group_num": 1,
|
3685 |
"group_size": 3584,
|
3686 |
"in_features": 3584,
|
@@ -3704,7 +3704,7 @@
|
|
3704 |
"model.layers.3.self_attn.v_proj": {
|
3705 |
"bias": true,
|
3706 |
"enable_norm": true,
|
3707 |
-
"enable_perm":
|
3708 |
"group_num": 1,
|
3709 |
"group_size": 3584,
|
3710 |
"in_features": 3584,
|
@@ -3728,7 +3728,7 @@
|
|
3728 |
"model.layers.4.mlp.down_proj": {
|
3729 |
"bias": null,
|
3730 |
"enable_norm": true,
|
3731 |
-
"enable_perm":
|
3732 |
"group_num": 1,
|
3733 |
"group_size": 18944,
|
3734 |
"in_features": 18944,
|
@@ -3752,7 +3752,7 @@
|
|
3752 |
"model.layers.4.mlp.gate_proj": {
|
3753 |
"bias": null,
|
3754 |
"enable_norm": true,
|
3755 |
-
"enable_perm":
|
3756 |
"group_num": 1,
|
3757 |
"group_size": 3584,
|
3758 |
"in_features": 3584,
|
@@ -3776,7 +3776,7 @@
|
|
3776 |
"model.layers.4.mlp.up_proj": {
|
3777 |
"bias": null,
|
3778 |
"enable_norm": true,
|
3779 |
-
"enable_perm":
|
3780 |
"group_num": 1,
|
3781 |
"group_size": 3584,
|
3782 |
"in_features": 3584,
|
@@ -3800,7 +3800,7 @@
|
|
3800 |
"model.layers.4.self_attn.k_proj": {
|
3801 |
"bias": true,
|
3802 |
"enable_norm": true,
|
3803 |
-
"enable_perm":
|
3804 |
"group_num": 1,
|
3805 |
"group_size": 3584,
|
3806 |
"in_features": 3584,
|
@@ -3824,7 +3824,7 @@
|
|
3824 |
"model.layers.4.self_attn.o_proj": {
|
3825 |
"bias": null,
|
3826 |
"enable_norm": true,
|
3827 |
-
"enable_perm":
|
3828 |
"group_num": 1,
|
3829 |
"group_size": 3584,
|
3830 |
"in_features": 3584,
|
@@ -3848,7 +3848,7 @@
|
|
3848 |
"model.layers.4.self_attn.q_proj": {
|
3849 |
"bias": true,
|
3850 |
"enable_norm": true,
|
3851 |
-
"enable_perm":
|
3852 |
"group_num": 1,
|
3853 |
"group_size": 3584,
|
3854 |
"in_features": 3584,
|
@@ -3872,7 +3872,7 @@
|
|
3872 |
"model.layers.4.self_attn.v_proj": {
|
3873 |
"bias": true,
|
3874 |
"enable_norm": true,
|
3875 |
-
"enable_perm":
|
3876 |
"group_num": 1,
|
3877 |
"group_size": 3584,
|
3878 |
"in_features": 3584,
|
@@ -3896,7 +3896,7 @@
|
|
3896 |
"model.layers.5.mlp.down_proj": {
|
3897 |
"bias": null,
|
3898 |
"enable_norm": true,
|
3899 |
-
"enable_perm":
|
3900 |
"group_num": 1,
|
3901 |
"group_size": 18944,
|
3902 |
"in_features": 18944,
|
@@ -3920,7 +3920,7 @@
|
|
3920 |
"model.layers.5.mlp.gate_proj": {
|
3921 |
"bias": null,
|
3922 |
"enable_norm": true,
|
3923 |
-
"enable_perm":
|
3924 |
"group_num": 1,
|
3925 |
"group_size": 3584,
|
3926 |
"in_features": 3584,
|
@@ -3944,7 +3944,7 @@
|
|
3944 |
"model.layers.5.mlp.up_proj": {
|
3945 |
"bias": null,
|
3946 |
"enable_norm": true,
|
3947 |
-
"enable_perm":
|
3948 |
"group_num": 1,
|
3949 |
"group_size": 3584,
|
3950 |
"in_features": 3584,
|
@@ -3968,7 +3968,7 @@
|
|
3968 |
"model.layers.5.self_attn.k_proj": {
|
3969 |
"bias": true,
|
3970 |
"enable_norm": true,
|
3971 |
-
"enable_perm":
|
3972 |
"group_num": 1,
|
3973 |
"group_size": 3584,
|
3974 |
"in_features": 3584,
|
@@ -3992,7 +3992,7 @@
|
|
3992 |
"model.layers.5.self_attn.o_proj": {
|
3993 |
"bias": null,
|
3994 |
"enable_norm": true,
|
3995 |
-
"enable_perm":
|
3996 |
"group_num": 1,
|
3997 |
"group_size": 3584,
|
3998 |
"in_features": 3584,
|
@@ -4016,7 +4016,7 @@
|
|
4016 |
"model.layers.5.self_attn.q_proj": {
|
4017 |
"bias": true,
|
4018 |
"enable_norm": true,
|
4019 |
-
"enable_perm":
|
4020 |
"group_num": 1,
|
4021 |
"group_size": 3584,
|
4022 |
"in_features": 3584,
|
@@ -4040,7 +4040,7 @@
|
|
4040 |
"model.layers.5.self_attn.v_proj": {
|
4041 |
"bias": true,
|
4042 |
"enable_norm": true,
|
4043 |
-
"enable_perm":
|
4044 |
"group_num": 1,
|
4045 |
"group_size": 3584,
|
4046 |
"in_features": 3584,
|
@@ -4064,7 +4064,7 @@
|
|
4064 |
"model.layers.6.mlp.down_proj": {
|
4065 |
"bias": null,
|
4066 |
"enable_norm": true,
|
4067 |
-
"enable_perm":
|
4068 |
"group_num": 1,
|
4069 |
"group_size": 18944,
|
4070 |
"in_features": 18944,
|
@@ -4088,7 +4088,7 @@
|
|
4088 |
"model.layers.6.mlp.gate_proj": {
|
4089 |
"bias": null,
|
4090 |
"enable_norm": true,
|
4091 |
-
"enable_perm":
|
4092 |
"group_num": 1,
|
4093 |
"group_size": 3584,
|
4094 |
"in_features": 3584,
|
@@ -4112,7 +4112,7 @@
|
|
4112 |
"model.layers.6.mlp.up_proj": {
|
4113 |
"bias": null,
|
4114 |
"enable_norm": true,
|
4115 |
-
"enable_perm":
|
4116 |
"group_num": 1,
|
4117 |
"group_size": 3584,
|
4118 |
"in_features": 3584,
|
@@ -4136,7 +4136,7 @@
|
|
4136 |
"model.layers.6.self_attn.k_proj": {
|
4137 |
"bias": true,
|
4138 |
"enable_norm": true,
|
4139 |
-
"enable_perm":
|
4140 |
"group_num": 1,
|
4141 |
"group_size": 3584,
|
4142 |
"in_features": 3584,
|
@@ -4160,7 +4160,7 @@
|
|
4160 |
"model.layers.6.self_attn.o_proj": {
|
4161 |
"bias": null,
|
4162 |
"enable_norm": true,
|
4163 |
-
"enable_perm":
|
4164 |
"group_num": 1,
|
4165 |
"group_size": 3584,
|
4166 |
"in_features": 3584,
|
@@ -4184,7 +4184,7 @@
|
|
4184 |
"model.layers.6.self_attn.q_proj": {
|
4185 |
"bias": true,
|
4186 |
"enable_norm": true,
|
4187 |
-
"enable_perm":
|
4188 |
"group_num": 1,
|
4189 |
"group_size": 3584,
|
4190 |
"in_features": 3584,
|
@@ -4208,7 +4208,7 @@
|
|
4208 |
"model.layers.6.self_attn.v_proj": {
|
4209 |
"bias": true,
|
4210 |
"enable_norm": true,
|
4211 |
-
"enable_perm":
|
4212 |
"group_num": 1,
|
4213 |
"group_size": 3584,
|
4214 |
"in_features": 3584,
|
@@ -4232,7 +4232,7 @@
|
|
4232 |
"model.layers.7.mlp.down_proj": {
|
4233 |
"bias": null,
|
4234 |
"enable_norm": true,
|
4235 |
-
"enable_perm":
|
4236 |
"group_num": 1,
|
4237 |
"group_size": 18944,
|
4238 |
"in_features": 18944,
|
@@ -4256,7 +4256,7 @@
|
|
4256 |
"model.layers.7.mlp.gate_proj": {
|
4257 |
"bias": null,
|
4258 |
"enable_norm": true,
|
4259 |
-
"enable_perm":
|
4260 |
"group_num": 1,
|
4261 |
"group_size": 3584,
|
4262 |
"in_features": 3584,
|
@@ -4280,7 +4280,7 @@
|
|
4280 |
"model.layers.7.mlp.up_proj": {
|
4281 |
"bias": null,
|
4282 |
"enable_norm": true,
|
4283 |
-
"enable_perm":
|
4284 |
"group_num": 1,
|
4285 |
"group_size": 3584,
|
4286 |
"in_features": 3584,
|
@@ -4304,7 +4304,7 @@
|
|
4304 |
"model.layers.7.self_attn.k_proj": {
|
4305 |
"bias": true,
|
4306 |
"enable_norm": true,
|
4307 |
-
"enable_perm":
|
4308 |
"group_num": 1,
|
4309 |
"group_size": 3584,
|
4310 |
"in_features": 3584,
|
@@ -4328,7 +4328,7 @@
|
|
4328 |
"model.layers.7.self_attn.o_proj": {
|
4329 |
"bias": null,
|
4330 |
"enable_norm": true,
|
4331 |
-
"enable_perm":
|
4332 |
"group_num": 1,
|
4333 |
"group_size": 3584,
|
4334 |
"in_features": 3584,
|
@@ -4352,7 +4352,7 @@
|
|
4352 |
"model.layers.7.self_attn.q_proj": {
|
4353 |
"bias": true,
|
4354 |
"enable_norm": true,
|
4355 |
-
"enable_perm":
|
4356 |
"group_num": 1,
|
4357 |
"group_size": 3584,
|
4358 |
"in_features": 3584,
|
@@ -4376,7 +4376,7 @@
|
|
4376 |
"model.layers.7.self_attn.v_proj": {
|
4377 |
"bias": true,
|
4378 |
"enable_norm": true,
|
4379 |
-
"enable_perm":
|
4380 |
"group_num": 1,
|
4381 |
"group_size": 3584,
|
4382 |
"in_features": 3584,
|
@@ -4400,7 +4400,7 @@
|
|
4400 |
"model.layers.8.mlp.down_proj": {
|
4401 |
"bias": null,
|
4402 |
"enable_norm": true,
|
4403 |
-
"enable_perm":
|
4404 |
"group_num": 1,
|
4405 |
"group_size": 18944,
|
4406 |
"in_features": 18944,
|
@@ -4424,7 +4424,7 @@
|
|
4424 |
"model.layers.8.mlp.gate_proj": {
|
4425 |
"bias": null,
|
4426 |
"enable_norm": true,
|
4427 |
-
"enable_perm":
|
4428 |
"group_num": 1,
|
4429 |
"group_size": 3584,
|
4430 |
"in_features": 3584,
|
@@ -4448,7 +4448,7 @@
|
|
4448 |
"model.layers.8.mlp.up_proj": {
|
4449 |
"bias": null,
|
4450 |
"enable_norm": true,
|
4451 |
-
"enable_perm":
|
4452 |
"group_num": 1,
|
4453 |
"group_size": 3584,
|
4454 |
"in_features": 3584,
|
@@ -4472,7 +4472,7 @@
|
|
4472 |
"model.layers.8.self_attn.k_proj": {
|
4473 |
"bias": true,
|
4474 |
"enable_norm": true,
|
4475 |
-
"enable_perm":
|
4476 |
"group_num": 1,
|
4477 |
"group_size": 3584,
|
4478 |
"in_features": 3584,
|
@@ -4496,7 +4496,7 @@
|
|
4496 |
"model.layers.8.self_attn.o_proj": {
|
4497 |
"bias": null,
|
4498 |
"enable_norm": true,
|
4499 |
-
"enable_perm":
|
4500 |
"group_num": 1,
|
4501 |
"group_size": 3584,
|
4502 |
"in_features": 3584,
|
@@ -4520,7 +4520,7 @@
|
|
4520 |
"model.layers.8.self_attn.q_proj": {
|
4521 |
"bias": true,
|
4522 |
"enable_norm": true,
|
4523 |
-
"enable_perm":
|
4524 |
"group_num": 1,
|
4525 |
"group_size": 3584,
|
4526 |
"in_features": 3584,
|
@@ -4544,7 +4544,7 @@
|
|
4544 |
"model.layers.8.self_attn.v_proj": {
|
4545 |
"bias": true,
|
4546 |
"enable_norm": true,
|
4547 |
-
"enable_perm":
|
4548 |
"group_num": 1,
|
4549 |
"group_size": 3584,
|
4550 |
"in_features": 3584,
|
@@ -4568,7 +4568,7 @@
|
|
4568 |
"model.layers.9.mlp.down_proj": {
|
4569 |
"bias": null,
|
4570 |
"enable_norm": true,
|
4571 |
-
"enable_perm":
|
4572 |
"group_num": 1,
|
4573 |
"group_size": 18944,
|
4574 |
"in_features": 18944,
|
@@ -4592,7 +4592,7 @@
|
|
4592 |
"model.layers.9.mlp.gate_proj": {
|
4593 |
"bias": null,
|
4594 |
"enable_norm": true,
|
4595 |
-
"enable_perm":
|
4596 |
"group_num": 1,
|
4597 |
"group_size": 3584,
|
4598 |
"in_features": 3584,
|
@@ -4616,7 +4616,7 @@
|
|
4616 |
"model.layers.9.mlp.up_proj": {
|
4617 |
"bias": null,
|
4618 |
"enable_norm": true,
|
4619 |
-
"enable_perm":
|
4620 |
"group_num": 1,
|
4621 |
"group_size": 3584,
|
4622 |
"in_features": 3584,
|
@@ -4640,7 +4640,7 @@
|
|
4640 |
"model.layers.9.self_attn.k_proj": {
|
4641 |
"bias": true,
|
4642 |
"enable_norm": true,
|
4643 |
-
"enable_perm":
|
4644 |
"group_num": 1,
|
4645 |
"group_size": 3584,
|
4646 |
"in_features": 3584,
|
@@ -4664,7 +4664,7 @@
|
|
4664 |
"model.layers.9.self_attn.o_proj": {
|
4665 |
"bias": null,
|
4666 |
"enable_norm": true,
|
4667 |
-
"enable_perm":
|
4668 |
"group_num": 1,
|
4669 |
"group_size": 3584,
|
4670 |
"in_features": 3584,
|
@@ -4688,7 +4688,7 @@
|
|
4688 |
"model.layers.9.self_attn.q_proj": {
|
4689 |
"bias": true,
|
4690 |
"enable_norm": true,
|
4691 |
-
"enable_perm":
|
4692 |
"group_num": 1,
|
4693 |
"group_size": 3584,
|
4694 |
"in_features": 3584,
|
@@ -4712,7 +4712,7 @@
|
|
4712 |
"model.layers.9.self_attn.v_proj": {
|
4713 |
"bias": true,
|
4714 |
"enable_norm": true,
|
4715 |
-
"enable_perm":
|
4716 |
"group_num": 1,
|
4717 |
"group_size": 3584,
|
4718 |
"in_features": 3584,
|
|
|
32 |
"model.layers.0.mlp.down_proj": {
|
33 |
"bias": null,
|
34 |
"enable_norm": true,
|
35 |
+
"enable_perm": false,
|
36 |
"group_num": 1,
|
37 |
"group_size": 18944,
|
38 |
"in_features": 18944,
|
|
|
56 |
"model.layers.0.mlp.gate_proj": {
|
57 |
"bias": null,
|
58 |
"enable_norm": true,
|
59 |
+
"enable_perm": false,
|
60 |
"group_num": 1,
|
61 |
"group_size": 3584,
|
62 |
"in_features": 3584,
|
|
|
80 |
"model.layers.0.mlp.up_proj": {
|
81 |
"bias": null,
|
82 |
"enable_norm": true,
|
83 |
+
"enable_perm": false,
|
84 |
"group_num": 1,
|
85 |
"group_size": 3584,
|
86 |
"in_features": 3584,
|
|
|
104 |
"model.layers.0.self_attn.k_proj": {
|
105 |
"bias": true,
|
106 |
"enable_norm": true,
|
107 |
+
"enable_perm": false,
|
108 |
"group_num": 1,
|
109 |
"group_size": 3584,
|
110 |
"in_features": 3584,
|
|
|
128 |
"model.layers.0.self_attn.o_proj": {
|
129 |
"bias": null,
|
130 |
"enable_norm": true,
|
131 |
+
"enable_perm": false,
|
132 |
"group_num": 1,
|
133 |
"group_size": 3584,
|
134 |
"in_features": 3584,
|
|
|
152 |
"model.layers.0.self_attn.q_proj": {
|
153 |
"bias": true,
|
154 |
"enable_norm": true,
|
155 |
+
"enable_perm": false,
|
156 |
"group_num": 1,
|
157 |
"group_size": 3584,
|
158 |
"in_features": 3584,
|
|
|
176 |
"model.layers.0.self_attn.v_proj": {
|
177 |
"bias": true,
|
178 |
"enable_norm": true,
|
179 |
+
"enable_perm": false,
|
180 |
"group_num": 1,
|
181 |
"group_size": 3584,
|
182 |
"in_features": 3584,
|
|
|
200 |
"model.layers.1.mlp.down_proj": {
|
201 |
"bias": null,
|
202 |
"enable_norm": true,
|
203 |
+
"enable_perm": false,
|
204 |
"group_num": 1,
|
205 |
"group_size": 18944,
|
206 |
"in_features": 18944,
|
|
|
224 |
"model.layers.1.mlp.gate_proj": {
|
225 |
"bias": null,
|
226 |
"enable_norm": true,
|
227 |
+
"enable_perm": false,
|
228 |
"group_num": 1,
|
229 |
"group_size": 3584,
|
230 |
"in_features": 3584,
|
|
|
248 |
"model.layers.1.mlp.up_proj": {
|
249 |
"bias": null,
|
250 |
"enable_norm": true,
|
251 |
+
"enable_perm": false,
|
252 |
"group_num": 1,
|
253 |
"group_size": 3584,
|
254 |
"in_features": 3584,
|
|
|
272 |
"model.layers.1.self_attn.k_proj": {
|
273 |
"bias": true,
|
274 |
"enable_norm": true,
|
275 |
+
"enable_perm": false,
|
276 |
"group_num": 1,
|
277 |
"group_size": 3584,
|
278 |
"in_features": 3584,
|
|
|
296 |
"model.layers.1.self_attn.o_proj": {
|
297 |
"bias": null,
|
298 |
"enable_norm": true,
|
299 |
+
"enable_perm": false,
|
300 |
"group_num": 1,
|
301 |
"group_size": 3584,
|
302 |
"in_features": 3584,
|
|
|
320 |
"model.layers.1.self_attn.q_proj": {
|
321 |
"bias": true,
|
322 |
"enable_norm": true,
|
323 |
+
"enable_perm": false,
|
324 |
"group_num": 1,
|
325 |
"group_size": 3584,
|
326 |
"in_features": 3584,
|
|
|
344 |
"model.layers.1.self_attn.v_proj": {
|
345 |
"bias": true,
|
346 |
"enable_norm": true,
|
347 |
+
"enable_perm": false,
|
348 |
"group_num": 1,
|
349 |
"group_size": 3584,
|
350 |
"in_features": 3584,
|
|
|
368 |
"model.layers.10.mlp.down_proj": {
|
369 |
"bias": null,
|
370 |
"enable_norm": true,
|
371 |
+
"enable_perm": false,
|
372 |
"group_num": 1,
|
373 |
"group_size": 18944,
|
374 |
"in_features": 18944,
|
|
|
392 |
"model.layers.10.mlp.gate_proj": {
|
393 |
"bias": null,
|
394 |
"enable_norm": true,
|
395 |
+
"enable_perm": false,
|
396 |
"group_num": 1,
|
397 |
"group_size": 3584,
|
398 |
"in_features": 3584,
|
|
|
416 |
"model.layers.10.mlp.up_proj": {
|
417 |
"bias": null,
|
418 |
"enable_norm": true,
|
419 |
+
"enable_perm": false,
|
420 |
"group_num": 1,
|
421 |
"group_size": 3584,
|
422 |
"in_features": 3584,
|
|
|
440 |
"model.layers.10.self_attn.k_proj": {
|
441 |
"bias": true,
|
442 |
"enable_norm": true,
|
443 |
+
"enable_perm": false,
|
444 |
"group_num": 1,
|
445 |
"group_size": 3584,
|
446 |
"in_features": 3584,
|
|
|
464 |
"model.layers.10.self_attn.o_proj": {
|
465 |
"bias": null,
|
466 |
"enable_norm": true,
|
467 |
+
"enable_perm": false,
|
468 |
"group_num": 1,
|
469 |
"group_size": 3584,
|
470 |
"in_features": 3584,
|
|
|
488 |
"model.layers.10.self_attn.q_proj": {
|
489 |
"bias": true,
|
490 |
"enable_norm": true,
|
491 |
+
"enable_perm": false,
|
492 |
"group_num": 1,
|
493 |
"group_size": 3584,
|
494 |
"in_features": 3584,
|
|
|
512 |
"model.layers.10.self_attn.v_proj": {
|
513 |
"bias": true,
|
514 |
"enable_norm": true,
|
515 |
+
"enable_perm": false,
|
516 |
"group_num": 1,
|
517 |
"group_size": 3584,
|
518 |
"in_features": 3584,
|
|
|
536 |
"model.layers.11.mlp.down_proj": {
|
537 |
"bias": null,
|
538 |
"enable_norm": true,
|
539 |
+
"enable_perm": false,
|
540 |
"group_num": 1,
|
541 |
"group_size": 18944,
|
542 |
"in_features": 18944,
|
|
|
560 |
"model.layers.11.mlp.gate_proj": {
|
561 |
"bias": null,
|
562 |
"enable_norm": true,
|
563 |
+
"enable_perm": false,
|
564 |
"group_num": 1,
|
565 |
"group_size": 3584,
|
566 |
"in_features": 3584,
|
|
|
584 |
"model.layers.11.mlp.up_proj": {
|
585 |
"bias": null,
|
586 |
"enable_norm": true,
|
587 |
+
"enable_perm": false,
|
588 |
"group_num": 1,
|
589 |
"group_size": 3584,
|
590 |
"in_features": 3584,
|
|
|
608 |
"model.layers.11.self_attn.k_proj": {
|
609 |
"bias": true,
|
610 |
"enable_norm": true,
|
611 |
+
"enable_perm": false,
|
612 |
"group_num": 1,
|
613 |
"group_size": 3584,
|
614 |
"in_features": 3584,
|
|
|
632 |
"model.layers.11.self_attn.o_proj": {
|
633 |
"bias": null,
|
634 |
"enable_norm": true,
|
635 |
+
"enable_perm": false,
|
636 |
"group_num": 1,
|
637 |
"group_size": 3584,
|
638 |
"in_features": 3584,
|
|
|
656 |
"model.layers.11.self_attn.q_proj": {
|
657 |
"bias": true,
|
658 |
"enable_norm": true,
|
659 |
+
"enable_perm": false,
|
660 |
"group_num": 1,
|
661 |
"group_size": 3584,
|
662 |
"in_features": 3584,
|
|
|
680 |
"model.layers.11.self_attn.v_proj": {
|
681 |
"bias": true,
|
682 |
"enable_norm": true,
|
683 |
+
"enable_perm": false,
|
684 |
"group_num": 1,
|
685 |
"group_size": 3584,
|
686 |
"in_features": 3584,
|
|
|
704 |
"model.layers.12.mlp.down_proj": {
|
705 |
"bias": null,
|
706 |
"enable_norm": true,
|
707 |
+
"enable_perm": false,
|
708 |
"group_num": 1,
|
709 |
"group_size": 18944,
|
710 |
"in_features": 18944,
|
|
|
728 |
"model.layers.12.mlp.gate_proj": {
|
729 |
"bias": null,
|
730 |
"enable_norm": true,
|
731 |
+
"enable_perm": false,
|
732 |
"group_num": 1,
|
733 |
"group_size": 3584,
|
734 |
"in_features": 3584,
|
|
|
752 |
"model.layers.12.mlp.up_proj": {
|
753 |
"bias": null,
|
754 |
"enable_norm": true,
|
755 |
+
"enable_perm": false,
|
756 |
"group_num": 1,
|
757 |
"group_size": 3584,
|
758 |
"in_features": 3584,
|
|
|
776 |
"model.layers.12.self_attn.k_proj": {
|
777 |
"bias": true,
|
778 |
"enable_norm": true,
|
779 |
+
"enable_perm": false,
|
780 |
"group_num": 1,
|
781 |
"group_size": 3584,
|
782 |
"in_features": 3584,
|
|
|
800 |
"model.layers.12.self_attn.o_proj": {
|
801 |
"bias": null,
|
802 |
"enable_norm": true,
|
803 |
+
"enable_perm": false,
|
804 |
"group_num": 1,
|
805 |
"group_size": 3584,
|
806 |
"in_features": 3584,
|
|
|
824 |
"model.layers.12.self_attn.q_proj": {
|
825 |
"bias": true,
|
826 |
"enable_norm": true,
|
827 |
+
"enable_perm": false,
|
828 |
"group_num": 1,
|
829 |
"group_size": 3584,
|
830 |
"in_features": 3584,
|
|
|
848 |
"model.layers.12.self_attn.v_proj": {
|
849 |
"bias": true,
|
850 |
"enable_norm": true,
|
851 |
+
"enable_perm": false,
|
852 |
"group_num": 1,
|
853 |
"group_size": 3584,
|
854 |
"in_features": 3584,
|
|
|
872 |
"model.layers.13.mlp.down_proj": {
|
873 |
"bias": null,
|
874 |
"enable_norm": true,
|
875 |
+
"enable_perm": false,
|
876 |
"group_num": 1,
|
877 |
"group_size": 18944,
|
878 |
"in_features": 18944,
|
|
|
896 |
"model.layers.13.mlp.gate_proj": {
|
897 |
"bias": null,
|
898 |
"enable_norm": true,
|
899 |
+
"enable_perm": false,
|
900 |
"group_num": 1,
|
901 |
"group_size": 3584,
|
902 |
"in_features": 3584,
|
|
|
920 |
"model.layers.13.mlp.up_proj": {
|
921 |
"bias": null,
|
922 |
"enable_norm": true,
|
923 |
+
"enable_perm": false,
|
924 |
"group_num": 1,
|
925 |
"group_size": 3584,
|
926 |
"in_features": 3584,
|
|
|
944 |
"model.layers.13.self_attn.k_proj": {
|
945 |
"bias": true,
|
946 |
"enable_norm": true,
|
947 |
+
"enable_perm": false,
|
948 |
"group_num": 1,
|
949 |
"group_size": 3584,
|
950 |
"in_features": 3584,
|
|
|
968 |
"model.layers.13.self_attn.o_proj": {
|
969 |
"bias": null,
|
970 |
"enable_norm": true,
|
971 |
+
"enable_perm": false,
|
972 |
"group_num": 1,
|
973 |
"group_size": 3584,
|
974 |
"in_features": 3584,
|
|
|
992 |
"model.layers.13.self_attn.q_proj": {
|
993 |
"bias": true,
|
994 |
"enable_norm": true,
|
995 |
+
"enable_perm": false,
|
996 |
"group_num": 1,
|
997 |
"group_size": 3584,
|
998 |
"in_features": 3584,
|
|
|
1016 |
"model.layers.13.self_attn.v_proj": {
|
1017 |
"bias": true,
|
1018 |
"enable_norm": true,
|
1019 |
+
"enable_perm": false,
|
1020 |
"group_num": 1,
|
1021 |
"group_size": 3584,
|
1022 |
"in_features": 3584,
|
|
|
1040 |
"model.layers.14.mlp.down_proj": {
|
1041 |
"bias": null,
|
1042 |
"enable_norm": true,
|
1043 |
+
"enable_perm": false,
|
1044 |
"group_num": 1,
|
1045 |
"group_size": 18944,
|
1046 |
"in_features": 18944,
|
|
|
1064 |
"model.layers.14.mlp.gate_proj": {
|
1065 |
"bias": null,
|
1066 |
"enable_norm": true,
|
1067 |
+
"enable_perm": false,
|
1068 |
"group_num": 1,
|
1069 |
"group_size": 3584,
|
1070 |
"in_features": 3584,
|
|
|
1088 |
"model.layers.14.mlp.up_proj": {
|
1089 |
"bias": null,
|
1090 |
"enable_norm": true,
|
1091 |
+
"enable_perm": false,
|
1092 |
"group_num": 1,
|
1093 |
"group_size": 3584,
|
1094 |
"in_features": 3584,
|
|
|
1112 |
"model.layers.14.self_attn.k_proj": {
|
1113 |
"bias": true,
|
1114 |
"enable_norm": true,
|
1115 |
+
"enable_perm": false,
|
1116 |
"group_num": 1,
|
1117 |
"group_size": 3584,
|
1118 |
"in_features": 3584,
|
|
|
1136 |
"model.layers.14.self_attn.o_proj": {
|
1137 |
"bias": null,
|
1138 |
"enable_norm": true,
|
1139 |
+
"enable_perm": false,
|
1140 |
"group_num": 1,
|
1141 |
"group_size": 3584,
|
1142 |
"in_features": 3584,
|
|
|
1160 |
"model.layers.14.self_attn.q_proj": {
|
1161 |
"bias": true,
|
1162 |
"enable_norm": true,
|
1163 |
+
"enable_perm": false,
|
1164 |
"group_num": 1,
|
1165 |
"group_size": 3584,
|
1166 |
"in_features": 3584,
|
|
|
1184 |
"model.layers.14.self_attn.v_proj": {
|
1185 |
"bias": true,
|
1186 |
"enable_norm": true,
|
1187 |
+
"enable_perm": false,
|
1188 |
"group_num": 1,
|
1189 |
"group_size": 3584,
|
1190 |
"in_features": 3584,
|
|
|
1208 |
"model.layers.15.mlp.down_proj": {
|
1209 |
"bias": null,
|
1210 |
"enable_norm": true,
|
1211 |
+
"enable_perm": false,
|
1212 |
"group_num": 1,
|
1213 |
"group_size": 18944,
|
1214 |
"in_features": 18944,
|
|
|
1232 |
"model.layers.15.mlp.gate_proj": {
|
1233 |
"bias": null,
|
1234 |
"enable_norm": true,
|
1235 |
+
"enable_perm": false,
|
1236 |
"group_num": 1,
|
1237 |
"group_size": 3584,
|
1238 |
"in_features": 3584,
|
|
|
1256 |
"model.layers.15.mlp.up_proj": {
|
1257 |
"bias": null,
|
1258 |
"enable_norm": true,
|
1259 |
+
"enable_perm": false,
|
1260 |
"group_num": 1,
|
1261 |
"group_size": 3584,
|
1262 |
"in_features": 3584,
|
|
|
1280 |
"model.layers.15.self_attn.k_proj": {
|
1281 |
"bias": true,
|
1282 |
"enable_norm": true,
|
1283 |
+
"enable_perm": false,
|
1284 |
"group_num": 1,
|
1285 |
"group_size": 3584,
|
1286 |
"in_features": 3584,
|
|
|
1304 |
"model.layers.15.self_attn.o_proj": {
|
1305 |
"bias": null,
|
1306 |
"enable_norm": true,
|
1307 |
+
"enable_perm": false,
|
1308 |
"group_num": 1,
|
1309 |
"group_size": 3584,
|
1310 |
"in_features": 3584,
|
|
|
1328 |
"model.layers.15.self_attn.q_proj": {
|
1329 |
"bias": true,
|
1330 |
"enable_norm": true,
|
1331 |
+
"enable_perm": false,
|
1332 |
"group_num": 1,
|
1333 |
"group_size": 3584,
|
1334 |
"in_features": 3584,
|
|
|
1352 |
"model.layers.15.self_attn.v_proj": {
|
1353 |
"bias": true,
|
1354 |
"enable_norm": true,
|
1355 |
+
"enable_perm": false,
|
1356 |
"group_num": 1,
|
1357 |
"group_size": 3584,
|
1358 |
"in_features": 3584,
|
|
|
1376 |
"model.layers.16.mlp.down_proj": {
|
1377 |
"bias": null,
|
1378 |
"enable_norm": true,
|
1379 |
+
"enable_perm": false,
|
1380 |
"group_num": 1,
|
1381 |
"group_size": 18944,
|
1382 |
"in_features": 18944,
|
|
|
1400 |
"model.layers.16.mlp.gate_proj": {
|
1401 |
"bias": null,
|
1402 |
"enable_norm": true,
|
1403 |
+
"enable_perm": false,
|
1404 |
"group_num": 1,
|
1405 |
"group_size": 3584,
|
1406 |
"in_features": 3584,
|
|
|
1424 |
"model.layers.16.mlp.up_proj": {
|
1425 |
"bias": null,
|
1426 |
"enable_norm": true,
|
1427 |
+
"enable_perm": false,
|
1428 |
"group_num": 1,
|
1429 |
"group_size": 3584,
|
1430 |
"in_features": 3584,
|
|
|
1448 |
"model.layers.16.self_attn.k_proj": {
|
1449 |
"bias": true,
|
1450 |
"enable_norm": true,
|
1451 |
+
"enable_perm": false,
|
1452 |
"group_num": 1,
|
1453 |
"group_size": 3584,
|
1454 |
"in_features": 3584,
|
|
|
1472 |
"model.layers.16.self_attn.o_proj": {
|
1473 |
"bias": null,
|
1474 |
"enable_norm": true,
|
1475 |
+
"enable_perm": false,
|
1476 |
"group_num": 1,
|
1477 |
"group_size": 3584,
|
1478 |
"in_features": 3584,
|
|
|
1496 |
"model.layers.16.self_attn.q_proj": {
|
1497 |
"bias": true,
|
1498 |
"enable_norm": true,
|
1499 |
+
"enable_perm": false,
|
1500 |
"group_num": 1,
|
1501 |
"group_size": 3584,
|
1502 |
"in_features": 3584,
|
|
|
1520 |
"model.layers.16.self_attn.v_proj": {
|
1521 |
"bias": true,
|
1522 |
"enable_norm": true,
|
1523 |
+
"enable_perm": false,
|
1524 |
"group_num": 1,
|
1525 |
"group_size": 3584,
|
1526 |
"in_features": 3584,
|
|
|
1544 |
"model.layers.17.mlp.down_proj": {
|
1545 |
"bias": null,
|
1546 |
"enable_norm": true,
|
1547 |
+
"enable_perm": false,
|
1548 |
"group_num": 1,
|
1549 |
"group_size": 18944,
|
1550 |
"in_features": 18944,
|
|
|
1568 |
"model.layers.17.mlp.gate_proj": {
|
1569 |
"bias": null,
|
1570 |
"enable_norm": true,
|
1571 |
+
"enable_perm": false,
|
1572 |
"group_num": 1,
|
1573 |
"group_size": 3584,
|
1574 |
"in_features": 3584,
|
|
|
1592 |
"model.layers.17.mlp.up_proj": {
|
1593 |
"bias": null,
|
1594 |
"enable_norm": true,
|
1595 |
+
"enable_perm": false,
|
1596 |
"group_num": 1,
|
1597 |
"group_size": 3584,
|
1598 |
"in_features": 3584,
|
|
|
1616 |
"model.layers.17.self_attn.k_proj": {
|
1617 |
"bias": true,
|
1618 |
"enable_norm": true,
|
1619 |
+
"enable_perm": false,
|
1620 |
"group_num": 1,
|
1621 |
"group_size": 3584,
|
1622 |
"in_features": 3584,
|
|
|
1640 |
"model.layers.17.self_attn.o_proj": {
|
1641 |
"bias": null,
|
1642 |
"enable_norm": true,
|
1643 |
+
"enable_perm": false,
|
1644 |
"group_num": 1,
|
1645 |
"group_size": 3584,
|
1646 |
"in_features": 3584,
|
|
|
1664 |
"model.layers.17.self_attn.q_proj": {
|
1665 |
"bias": true,
|
1666 |
"enable_norm": true,
|
1667 |
+
"enable_perm": false,
|
1668 |
"group_num": 1,
|
1669 |
"group_size": 3584,
|
1670 |
"in_features": 3584,
|
|
|
1688 |
"model.layers.17.self_attn.v_proj": {
|
1689 |
"bias": true,
|
1690 |
"enable_norm": true,
|
1691 |
+
"enable_perm": false,
|
1692 |
"group_num": 1,
|
1693 |
"group_size": 3584,
|
1694 |
"in_features": 3584,
|
|
|
1712 |
"model.layers.18.mlp.down_proj": {
|
1713 |
"bias": null,
|
1714 |
"enable_norm": true,
|
1715 |
+
"enable_perm": false,
|
1716 |
"group_num": 1,
|
1717 |
"group_size": 18944,
|
1718 |
"in_features": 18944,
|
|
|
1736 |
"model.layers.18.mlp.gate_proj": {
|
1737 |
"bias": null,
|
1738 |
"enable_norm": true,
|
1739 |
+
"enable_perm": false,
|
1740 |
"group_num": 1,
|
1741 |
"group_size": 3584,
|
1742 |
"in_features": 3584,
|
|
|
1760 |
"model.layers.18.mlp.up_proj": {
|
1761 |
"bias": null,
|
1762 |
"enable_norm": true,
|
1763 |
+
"enable_perm": false,
|
1764 |
"group_num": 1,
|
1765 |
"group_size": 3584,
|
1766 |
"in_features": 3584,
|
|
|
1784 |
"model.layers.18.self_attn.k_proj": {
|
1785 |
"bias": true,
|
1786 |
"enable_norm": true,
|
1787 |
+
"enable_perm": false,
|
1788 |
"group_num": 1,
|
1789 |
"group_size": 3584,
|
1790 |
"in_features": 3584,
|
|
|
1808 |
"model.layers.18.self_attn.o_proj": {
|
1809 |
"bias": null,
|
1810 |
"enable_norm": true,
|
1811 |
+
"enable_perm": false,
|
1812 |
"group_num": 1,
|
1813 |
"group_size": 3584,
|
1814 |
"in_features": 3584,
|
|
|
1832 |
"model.layers.18.self_attn.q_proj": {
|
1833 |
"bias": true,
|
1834 |
"enable_norm": true,
|
1835 |
+
"enable_perm": false,
|
1836 |
"group_num": 1,
|
1837 |
"group_size": 3584,
|
1838 |
"in_features": 3584,
|
|
|
1856 |
"model.layers.18.self_attn.v_proj": {
|
1857 |
"bias": true,
|
1858 |
"enable_norm": true,
|
1859 |
+
"enable_perm": false,
|
1860 |
"group_num": 1,
|
1861 |
"group_size": 3584,
|
1862 |
"in_features": 3584,
|
|
|
1880 |
"model.layers.19.mlp.down_proj": {
|
1881 |
"bias": null,
|
1882 |
"enable_norm": true,
|
1883 |
+
"enable_perm": false,
|
1884 |
"group_num": 1,
|
1885 |
"group_size": 18944,
|
1886 |
"in_features": 18944,
|
|
|
1904 |
"model.layers.19.mlp.gate_proj": {
|
1905 |
"bias": null,
|
1906 |
"enable_norm": true,
|
1907 |
+
"enable_perm": false,
|
1908 |
"group_num": 1,
|
1909 |
"group_size": 3584,
|
1910 |
"in_features": 3584,
|
|
|
1928 |
"model.layers.19.mlp.up_proj": {
|
1929 |
"bias": null,
|
1930 |
"enable_norm": true,
|
1931 |
+
"enable_perm": false,
|
1932 |
"group_num": 1,
|
1933 |
"group_size": 3584,
|
1934 |
"in_features": 3584,
|
|
|
1952 |
"model.layers.19.self_attn.k_proj": {
|
1953 |
"bias": true,
|
1954 |
"enable_norm": true,
|
1955 |
+
"enable_perm": false,
|
1956 |
"group_num": 1,
|
1957 |
"group_size": 3584,
|
1958 |
"in_features": 3584,
|
|
|
1976 |
"model.layers.19.self_attn.o_proj": {
|
1977 |
"bias": null,
|
1978 |
"enable_norm": true,
|
1979 |
+
"enable_perm": false,
|
1980 |
"group_num": 1,
|
1981 |
"group_size": 3584,
|
1982 |
"in_features": 3584,
|
|
|
2000 |
"model.layers.19.self_attn.q_proj": {
|
2001 |
"bias": true,
|
2002 |
"enable_norm": true,
|
2003 |
+
"enable_perm": false,
|
2004 |
"group_num": 1,
|
2005 |
"group_size": 3584,
|
2006 |
"in_features": 3584,
|
|
|
2024 |
"model.layers.19.self_attn.v_proj": {
|
2025 |
"bias": true,
|
2026 |
"enable_norm": true,
|
2027 |
+
"enable_perm": false,
|
2028 |
"group_num": 1,
|
2029 |
"group_size": 3584,
|
2030 |
"in_features": 3584,
|
|
|
2048 |
"model.layers.2.mlp.down_proj": {
|
2049 |
"bias": null,
|
2050 |
"enable_norm": true,
|
2051 |
+
"enable_perm": false,
|
2052 |
"group_num": 1,
|
2053 |
"group_size": 18944,
|
2054 |
"in_features": 18944,
|
|
|
2072 |
"model.layers.2.mlp.gate_proj": {
|
2073 |
"bias": null,
|
2074 |
"enable_norm": true,
|
2075 |
+
"enable_perm": false,
|
2076 |
"group_num": 1,
|
2077 |
"group_size": 3584,
|
2078 |
"in_features": 3584,
|
|
|
2096 |
"model.layers.2.mlp.up_proj": {
|
2097 |
"bias": null,
|
2098 |
"enable_norm": true,
|
2099 |
+
"enable_perm": false,
|
2100 |
"group_num": 1,
|
2101 |
"group_size": 3584,
|
2102 |
"in_features": 3584,
|
|
|
2120 |
"model.layers.2.self_attn.k_proj": {
|
2121 |
"bias": true,
|
2122 |
"enable_norm": true,
|
2123 |
+
"enable_perm": false,
|
2124 |
"group_num": 1,
|
2125 |
"group_size": 3584,
|
2126 |
"in_features": 3584,
|
|
|
2144 |
"model.layers.2.self_attn.o_proj": {
|
2145 |
"bias": null,
|
2146 |
"enable_norm": true,
|
2147 |
+
"enable_perm": false,
|
2148 |
"group_num": 1,
|
2149 |
"group_size": 3584,
|
2150 |
"in_features": 3584,
|
|
|
2168 |
"model.layers.2.self_attn.q_proj": {
|
2169 |
"bias": true,
|
2170 |
"enable_norm": true,
|
2171 |
+
"enable_perm": false,
|
2172 |
"group_num": 1,
|
2173 |
"group_size": 3584,
|
2174 |
"in_features": 3584,
|
|
|
2192 |
"model.layers.2.self_attn.v_proj": {
|
2193 |
"bias": true,
|
2194 |
"enable_norm": true,
|
2195 |
+
"enable_perm": false,
|
2196 |
"group_num": 1,
|
2197 |
"group_size": 3584,
|
2198 |
"in_features": 3584,
|
|
|
2216 |
"model.layers.20.mlp.down_proj": {
|
2217 |
"bias": null,
|
2218 |
"enable_norm": true,
|
2219 |
+
"enable_perm": false,
|
2220 |
"group_num": 1,
|
2221 |
"group_size": 18944,
|
2222 |
"in_features": 18944,
|
|
|
2240 |
"model.layers.20.mlp.gate_proj": {
|
2241 |
"bias": null,
|
2242 |
"enable_norm": true,
|
2243 |
+
"enable_perm": false,
|
2244 |
"group_num": 1,
|
2245 |
"group_size": 3584,
|
2246 |
"in_features": 3584,
|
|
|
2264 |
"model.layers.20.mlp.up_proj": {
|
2265 |
"bias": null,
|
2266 |
"enable_norm": true,
|
2267 |
+
"enable_perm": false,
|
2268 |
"group_num": 1,
|
2269 |
"group_size": 3584,
|
2270 |
"in_features": 3584,
|
|
|
2288 |
"model.layers.20.self_attn.k_proj": {
|
2289 |
"bias": true,
|
2290 |
"enable_norm": true,
|
2291 |
+
"enable_perm": false,
|
2292 |
"group_num": 1,
|
2293 |
"group_size": 3584,
|
2294 |
"in_features": 3584,
|
|
|
2312 |
"model.layers.20.self_attn.o_proj": {
|
2313 |
"bias": null,
|
2314 |
"enable_norm": true,
|
2315 |
+
"enable_perm": false,
|
2316 |
"group_num": 1,
|
2317 |
"group_size": 3584,
|
2318 |
"in_features": 3584,
|
|
|
2336 |
"model.layers.20.self_attn.q_proj": {
|
2337 |
"bias": true,
|
2338 |
"enable_norm": true,
|
2339 |
+
"enable_perm": false,
|
2340 |
"group_num": 1,
|
2341 |
"group_size": 3584,
|
2342 |
"in_features": 3584,
|
|
|
2360 |
"model.layers.20.self_attn.v_proj": {
|
2361 |
"bias": true,
|
2362 |
"enable_norm": true,
|
2363 |
+
"enable_perm": false,
|
2364 |
"group_num": 1,
|
2365 |
"group_size": 3584,
|
2366 |
"in_features": 3584,
|
|
|
2384 |
"model.layers.21.mlp.down_proj": {
|
2385 |
"bias": null,
|
2386 |
"enable_norm": true,
|
2387 |
+
"enable_perm": false,
|
2388 |
"group_num": 1,
|
2389 |
"group_size": 18944,
|
2390 |
"in_features": 18944,
|
|
|
2408 |
"model.layers.21.mlp.gate_proj": {
|
2409 |
"bias": null,
|
2410 |
"enable_norm": true,
|
2411 |
+
"enable_perm": false,
|
2412 |
"group_num": 1,
|
2413 |
"group_size": 3584,
|
2414 |
"in_features": 3584,
|
|
|
2432 |
"model.layers.21.mlp.up_proj": {
|
2433 |
"bias": null,
|
2434 |
"enable_norm": true,
|
2435 |
+
"enable_perm": false,
|
2436 |
"group_num": 1,
|
2437 |
"group_size": 3584,
|
2438 |
"in_features": 3584,
|
|
|
2456 |
"model.layers.21.self_attn.k_proj": {
|
2457 |
"bias": true,
|
2458 |
"enable_norm": true,
|
2459 |
+
"enable_perm": false,
|
2460 |
"group_num": 1,
|
2461 |
"group_size": 3584,
|
2462 |
"in_features": 3584,
|
|
|
2480 |
"model.layers.21.self_attn.o_proj": {
|
2481 |
"bias": null,
|
2482 |
"enable_norm": true,
|
2483 |
+
"enable_perm": false,
|
2484 |
"group_num": 1,
|
2485 |
"group_size": 3584,
|
2486 |
"in_features": 3584,
|
|
|
2504 |
"model.layers.21.self_attn.q_proj": {
|
2505 |
"bias": true,
|
2506 |
"enable_norm": true,
|
2507 |
+
"enable_perm": false,
|
2508 |
"group_num": 1,
|
2509 |
"group_size": 3584,
|
2510 |
"in_features": 3584,
|
|
|
2528 |
"model.layers.21.self_attn.v_proj": {
|
2529 |
"bias": true,
|
2530 |
"enable_norm": true,
|
2531 |
+
"enable_perm": false,
|
2532 |
"group_num": 1,
|
2533 |
"group_size": 3584,
|
2534 |
"in_features": 3584,
|
|
|
2552 |
"model.layers.22.mlp.down_proj": {
|
2553 |
"bias": null,
|
2554 |
"enable_norm": true,
|
2555 |
+
"enable_perm": false,
|
2556 |
"group_num": 1,
|
2557 |
"group_size": 18944,
|
2558 |
"in_features": 18944,
|
|
|
2576 |
"model.layers.22.mlp.gate_proj": {
|
2577 |
"bias": null,
|
2578 |
"enable_norm": true,
|
2579 |
+
"enable_perm": false,
|
2580 |
"group_num": 1,
|
2581 |
"group_size": 3584,
|
2582 |
"in_features": 3584,
|
|
|
2600 |
"model.layers.22.mlp.up_proj": {
|
2601 |
"bias": null,
|
2602 |
"enable_norm": true,
|
2603 |
+
"enable_perm": false,
|
2604 |
"group_num": 1,
|
2605 |
"group_size": 3584,
|
2606 |
"in_features": 3584,
|
|
|
2624 |
"model.layers.22.self_attn.k_proj": {
|
2625 |
"bias": true,
|
2626 |
"enable_norm": true,
|
2627 |
+
"enable_perm": false,
|
2628 |
"group_num": 1,
|
2629 |
"group_size": 3584,
|
2630 |
"in_features": 3584,
|
|
|
2648 |
"model.layers.22.self_attn.o_proj": {
|
2649 |
"bias": null,
|
2650 |
"enable_norm": true,
|
2651 |
+
"enable_perm": false,
|
2652 |
"group_num": 1,
|
2653 |
"group_size": 3584,
|
2654 |
"in_features": 3584,
|
|
|
2672 |
"model.layers.22.self_attn.q_proj": {
|
2673 |
"bias": true,
|
2674 |
"enable_norm": true,
|
2675 |
+
"enable_perm": false,
|
2676 |
"group_num": 1,
|
2677 |
"group_size": 3584,
|
2678 |
"in_features": 3584,
|
|
|
2696 |
"model.layers.22.self_attn.v_proj": {
|
2697 |
"bias": true,
|
2698 |
"enable_norm": true,
|
2699 |
+
"enable_perm": false,
|
2700 |
"group_num": 1,
|
2701 |
"group_size": 3584,
|
2702 |
"in_features": 3584,
|
|
|
2720 |
"model.layers.23.mlp.down_proj": {
|
2721 |
"bias": null,
|
2722 |
"enable_norm": true,
|
2723 |
+
"enable_perm": false,
|
2724 |
"group_num": 1,
|
2725 |
"group_size": 18944,
|
2726 |
"in_features": 18944,
|
|
|
2744 |
"model.layers.23.mlp.gate_proj": {
|
2745 |
"bias": null,
|
2746 |
"enable_norm": true,
|
2747 |
+
"enable_perm": false,
|
2748 |
"group_num": 1,
|
2749 |
"group_size": 3584,
|
2750 |
"in_features": 3584,
|
|
|
2768 |
"model.layers.23.mlp.up_proj": {
|
2769 |
"bias": null,
|
2770 |
"enable_norm": true,
|
2771 |
+
"enable_perm": false,
|
2772 |
"group_num": 1,
|
2773 |
"group_size": 3584,
|
2774 |
"in_features": 3584,
|
|
|
2792 |
"model.layers.23.self_attn.k_proj": {
|
2793 |
"bias": true,
|
2794 |
"enable_norm": true,
|
2795 |
+
"enable_perm": false,
|
2796 |
"group_num": 1,
|
2797 |
"group_size": 3584,
|
2798 |
"in_features": 3584,
|
|
|
2816 |
"model.layers.23.self_attn.o_proj": {
|
2817 |
"bias": null,
|
2818 |
"enable_norm": true,
|
2819 |
+
"enable_perm": false,
|
2820 |
"group_num": 1,
|
2821 |
"group_size": 3584,
|
2822 |
"in_features": 3584,
|
|
|
2840 |
"model.layers.23.self_attn.q_proj": {
|
2841 |
"bias": true,
|
2842 |
"enable_norm": true,
|
2843 |
+
"enable_perm": false,
|
2844 |
"group_num": 1,
|
2845 |
"group_size": 3584,
|
2846 |
"in_features": 3584,
|
|
|
2864 |
"model.layers.23.self_attn.v_proj": {
|
2865 |
"bias": true,
|
2866 |
"enable_norm": true,
|
2867 |
+
"enable_perm": false,
|
2868 |
"group_num": 1,
|
2869 |
"group_size": 3584,
|
2870 |
"in_features": 3584,
|
|
|
2888 |
"model.layers.24.mlp.down_proj": {
|
2889 |
"bias": null,
|
2890 |
"enable_norm": true,
|
2891 |
+
"enable_perm": false,
|
2892 |
"group_num": 1,
|
2893 |
"group_size": 18944,
|
2894 |
"in_features": 18944,
|
|
|
2912 |
"model.layers.24.mlp.gate_proj": {
|
2913 |
"bias": null,
|
2914 |
"enable_norm": true,
|
2915 |
+
"enable_perm": false,
|
2916 |
"group_num": 1,
|
2917 |
"group_size": 3584,
|
2918 |
"in_features": 3584,
|
|
|
2936 |
"model.layers.24.mlp.up_proj": {
|
2937 |
"bias": null,
|
2938 |
"enable_norm": true,
|
2939 |
+
"enable_perm": false,
|
2940 |
"group_num": 1,
|
2941 |
"group_size": 3584,
|
2942 |
"in_features": 3584,
|
|
|
2960 |
"model.layers.24.self_attn.k_proj": {
|
2961 |
"bias": true,
|
2962 |
"enable_norm": true,
|
2963 |
+
"enable_perm": false,
|
2964 |
"group_num": 1,
|
2965 |
"group_size": 3584,
|
2966 |
"in_features": 3584,
|
|
|
2984 |
"model.layers.24.self_attn.o_proj": {
|
2985 |
"bias": null,
|
2986 |
"enable_norm": true,
|
2987 |
+
"enable_perm": false,
|
2988 |
"group_num": 1,
|
2989 |
"group_size": 3584,
|
2990 |
"in_features": 3584,
|
|
|
3008 |
"model.layers.24.self_attn.q_proj": {
|
3009 |
"bias": true,
|
3010 |
"enable_norm": true,
|
3011 |
+
"enable_perm": false,
|
3012 |
"group_num": 1,
|
3013 |
"group_size": 3584,
|
3014 |
"in_features": 3584,
|
|
|
3032 |
"model.layers.24.self_attn.v_proj": {
|
3033 |
"bias": true,
|
3034 |
"enable_norm": true,
|
3035 |
+
"enable_perm": false,
|
3036 |
"group_num": 1,
|
3037 |
"group_size": 3584,
|
3038 |
"in_features": 3584,
|
|
|
3056 |
"model.layers.25.mlp.down_proj": {
|
3057 |
"bias": null,
|
3058 |
"enable_norm": true,
|
3059 |
+
"enable_perm": false,
|
3060 |
"group_num": 1,
|
3061 |
"group_size": 18944,
|
3062 |
"in_features": 18944,
|
|
|
3080 |
"model.layers.25.mlp.gate_proj": {
|
3081 |
"bias": null,
|
3082 |
"enable_norm": true,
|
3083 |
+
"enable_perm": false,
|
3084 |
"group_num": 1,
|
3085 |
"group_size": 3584,
|
3086 |
"in_features": 3584,
|
|
|
3104 |
"model.layers.25.mlp.up_proj": {
|
3105 |
"bias": null,
|
3106 |
"enable_norm": true,
|
3107 |
+
"enable_perm": false,
|
3108 |
"group_num": 1,
|
3109 |
"group_size": 3584,
|
3110 |
"in_features": 3584,
|
|
|
3128 |
"model.layers.25.self_attn.k_proj": {
|
3129 |
"bias": true,
|
3130 |
"enable_norm": true,
|
3131 |
+
"enable_perm": false,
|
3132 |
"group_num": 1,
|
3133 |
"group_size": 3584,
|
3134 |
"in_features": 3584,
|
|
|
3152 |
"model.layers.25.self_attn.o_proj": {
|
3153 |
"bias": null,
|
3154 |
"enable_norm": true,
|
3155 |
+
"enable_perm": false,
|
3156 |
"group_num": 1,
|
3157 |
"group_size": 3584,
|
3158 |
"in_features": 3584,
|
|
|
3176 |
"model.layers.25.self_attn.q_proj": {
|
3177 |
"bias": true,
|
3178 |
"enable_norm": true,
|
3179 |
+
"enable_perm": false,
|
3180 |
"group_num": 1,
|
3181 |
"group_size": 3584,
|
3182 |
"in_features": 3584,
|
|
|
3200 |
"model.layers.25.self_attn.v_proj": {
|
3201 |
"bias": true,
|
3202 |
"enable_norm": true,
|
3203 |
+
"enable_perm": false,
|
3204 |
"group_num": 1,
|
3205 |
"group_size": 3584,
|
3206 |
"in_features": 3584,
|
|
|
3224 |
"model.layers.26.mlp.down_proj": {
|
3225 |
"bias": null,
|
3226 |
"enable_norm": true,
|
3227 |
+
"enable_perm": false,
|
3228 |
"group_num": 1,
|
3229 |
"group_size": 18944,
|
3230 |
"in_features": 18944,
|
|
|
3248 |
"model.layers.26.mlp.gate_proj": {
|
3249 |
"bias": null,
|
3250 |
"enable_norm": true,
|
3251 |
+
"enable_perm": false,
|
3252 |
"group_num": 1,
|
3253 |
"group_size": 3584,
|
3254 |
"in_features": 3584,
|
|
|
3272 |
"model.layers.26.mlp.up_proj": {
|
3273 |
"bias": null,
|
3274 |
"enable_norm": true,
|
3275 |
+
"enable_perm": false,
|
3276 |
"group_num": 1,
|
3277 |
"group_size": 3584,
|
3278 |
"in_features": 3584,
|
|
|
3296 |
"model.layers.26.self_attn.k_proj": {
|
3297 |
"bias": true,
|
3298 |
"enable_norm": true,
|
3299 |
+
"enable_perm": false,
|
3300 |
"group_num": 1,
|
3301 |
"group_size": 3584,
|
3302 |
"in_features": 3584,
|
|
|
3320 |
"model.layers.26.self_attn.o_proj": {
|
3321 |
"bias": null,
|
3322 |
"enable_norm": true,
|
3323 |
+
"enable_perm": false,
|
3324 |
"group_num": 1,
|
3325 |
"group_size": 3584,
|
3326 |
"in_features": 3584,
|
|
|
3344 |
"model.layers.26.self_attn.q_proj": {
|
3345 |
"bias": true,
|
3346 |
"enable_norm": true,
|
3347 |
+
"enable_perm": false,
|
3348 |
"group_num": 1,
|
3349 |
"group_size": 3584,
|
3350 |
"in_features": 3584,
|
|
|
3368 |
"model.layers.26.self_attn.v_proj": {
|
3369 |
"bias": true,
|
3370 |
"enable_norm": true,
|
3371 |
+
"enable_perm": false,
|
3372 |
"group_num": 1,
|
3373 |
"group_size": 3584,
|
3374 |
"in_features": 3584,
|
|
|
3392 |
"model.layers.27.mlp.down_proj": {
|
3393 |
"bias": null,
|
3394 |
"enable_norm": true,
|
3395 |
+
"enable_perm": false,
|
3396 |
"group_num": 1,
|
3397 |
"group_size": 18944,
|
3398 |
"in_features": 18944,
|
|
|
3416 |
"model.layers.27.mlp.gate_proj": {
|
3417 |
"bias": null,
|
3418 |
"enable_norm": true,
|
3419 |
+
"enable_perm": false,
|
3420 |
"group_num": 1,
|
3421 |
"group_size": 3584,
|
3422 |
"in_features": 3584,
|
|
|
3440 |
"model.layers.27.mlp.up_proj": {
|
3441 |
"bias": null,
|
3442 |
"enable_norm": true,
|
3443 |
+
"enable_perm": false,
|
3444 |
"group_num": 1,
|
3445 |
"group_size": 3584,
|
3446 |
"in_features": 3584,
|
|
|
3464 |
"model.layers.27.self_attn.k_proj": {
|
3465 |
"bias": true,
|
3466 |
"enable_norm": true,
|
3467 |
+
"enable_perm": false,
|
3468 |
"group_num": 1,
|
3469 |
"group_size": 3584,
|
3470 |
"in_features": 3584,
|
|
|
3488 |
"model.layers.27.self_attn.o_proj": {
|
3489 |
"bias": null,
|
3490 |
"enable_norm": true,
|
3491 |
+
"enable_perm": false,
|
3492 |
"group_num": 1,
|
3493 |
"group_size": 3584,
|
3494 |
"in_features": 3584,
|
|
|
3512 |
"model.layers.27.self_attn.q_proj": {
|
3513 |
"bias": true,
|
3514 |
"enable_norm": true,
|
3515 |
+
"enable_perm": false,
|
3516 |
"group_num": 1,
|
3517 |
"group_size": 3584,
|
3518 |
"in_features": 3584,
|
|
|
3536 |
"model.layers.27.self_attn.v_proj": {
|
3537 |
"bias": true,
|
3538 |
"enable_norm": true,
|
3539 |
+
"enable_perm": false,
|
3540 |
"group_num": 1,
|
3541 |
"group_size": 3584,
|
3542 |
"in_features": 3584,
|
|
|
3560 |
"model.layers.3.mlp.down_proj": {
|
3561 |
"bias": null,
|
3562 |
"enable_norm": true,
|
3563 |
+
"enable_perm": false,
|
3564 |
"group_num": 1,
|
3565 |
"group_size": 18944,
|
3566 |
"in_features": 18944,
|
|
|
3584 |
"model.layers.3.mlp.gate_proj": {
|
3585 |
"bias": null,
|
3586 |
"enable_norm": true,
|
3587 |
+
"enable_perm": false,
|
3588 |
"group_num": 1,
|
3589 |
"group_size": 3584,
|
3590 |
"in_features": 3584,
|
|
|
3608 |
"model.layers.3.mlp.up_proj": {
|
3609 |
"bias": null,
|
3610 |
"enable_norm": true,
|
3611 |
+
"enable_perm": false,
|
3612 |
"group_num": 1,
|
3613 |
"group_size": 3584,
|
3614 |
"in_features": 3584,
|
|
|
3632 |
"model.layers.3.self_attn.k_proj": {
|
3633 |
"bias": true,
|
3634 |
"enable_norm": true,
|
3635 |
+
"enable_perm": false,
|
3636 |
"group_num": 1,
|
3637 |
"group_size": 3584,
|
3638 |
"in_features": 3584,
|
|
|
3656 |
"model.layers.3.self_attn.o_proj": {
|
3657 |
"bias": null,
|
3658 |
"enable_norm": true,
|
3659 |
+
"enable_perm": false,
|
3660 |
"group_num": 1,
|
3661 |
"group_size": 3584,
|
3662 |
"in_features": 3584,
|
|
|
3680 |
"model.layers.3.self_attn.q_proj": {
|
3681 |
"bias": true,
|
3682 |
"enable_norm": true,
|
3683 |
+
"enable_perm": false,
|
3684 |
"group_num": 1,
|
3685 |
"group_size": 3584,
|
3686 |
"in_features": 3584,
|
|
|
3704 |
"model.layers.3.self_attn.v_proj": {
|
3705 |
"bias": true,
|
3706 |
"enable_norm": true,
|
3707 |
+
"enable_perm": false,
|
3708 |
"group_num": 1,
|
3709 |
"group_size": 3584,
|
3710 |
"in_features": 3584,
|
|
|
3728 |
"model.layers.4.mlp.down_proj": {
|
3729 |
"bias": null,
|
3730 |
"enable_norm": true,
|
3731 |
+
"enable_perm": false,
|
3732 |
"group_num": 1,
|
3733 |
"group_size": 18944,
|
3734 |
"in_features": 18944,
|
|
|
3752 |
"model.layers.4.mlp.gate_proj": {
|
3753 |
"bias": null,
|
3754 |
"enable_norm": true,
|
3755 |
+
"enable_perm": false,
|
3756 |
"group_num": 1,
|
3757 |
"group_size": 3584,
|
3758 |
"in_features": 3584,
|
|
|
3776 |
"model.layers.4.mlp.up_proj": {
|
3777 |
"bias": null,
|
3778 |
"enable_norm": true,
|
3779 |
+
"enable_perm": false,
|
3780 |
"group_num": 1,
|
3781 |
"group_size": 3584,
|
3782 |
"in_features": 3584,
|
|
|
3800 |
"model.layers.4.self_attn.k_proj": {
|
3801 |
"bias": true,
|
3802 |
"enable_norm": true,
|
3803 |
+
"enable_perm": false,
|
3804 |
"group_num": 1,
|
3805 |
"group_size": 3584,
|
3806 |
"in_features": 3584,
|
|
|
3824 |
"model.layers.4.self_attn.o_proj": {
|
3825 |
"bias": null,
|
3826 |
"enable_norm": true,
|
3827 |
+
"enable_perm": false,
|
3828 |
"group_num": 1,
|
3829 |
"group_size": 3584,
|
3830 |
"in_features": 3584,
|
|
|
3848 |
"model.layers.4.self_attn.q_proj": {
|
3849 |
"bias": true,
|
3850 |
"enable_norm": true,
|
3851 |
+
"enable_perm": false,
|
3852 |
"group_num": 1,
|
3853 |
"group_size": 3584,
|
3854 |
"in_features": 3584,
|
|
|
3872 |
"model.layers.4.self_attn.v_proj": {
|
3873 |
"bias": true,
|
3874 |
"enable_norm": true,
|
3875 |
+
"enable_perm": false,
|
3876 |
"group_num": 1,
|
3877 |
"group_size": 3584,
|
3878 |
"in_features": 3584,
|
|
|
3896 |
"model.layers.5.mlp.down_proj": {
|
3897 |
"bias": null,
|
3898 |
"enable_norm": true,
|
3899 |
+
"enable_perm": false,
|
3900 |
"group_num": 1,
|
3901 |
"group_size": 18944,
|
3902 |
"in_features": 18944,
|
|
|
3920 |
"model.layers.5.mlp.gate_proj": {
|
3921 |
"bias": null,
|
3922 |
"enable_norm": true,
|
3923 |
+
"enable_perm": false,
|
3924 |
"group_num": 1,
|
3925 |
"group_size": 3584,
|
3926 |
"in_features": 3584,
|
|
|
3944 |
"model.layers.5.mlp.up_proj": {
|
3945 |
"bias": null,
|
3946 |
"enable_norm": true,
|
3947 |
+
"enable_perm": false,
|
3948 |
"group_num": 1,
|
3949 |
"group_size": 3584,
|
3950 |
"in_features": 3584,
|
|
|
3968 |
"model.layers.5.self_attn.k_proj": {
|
3969 |
"bias": true,
|
3970 |
"enable_norm": true,
|
3971 |
+
"enable_perm": false,
|
3972 |
"group_num": 1,
|
3973 |
"group_size": 3584,
|
3974 |
"in_features": 3584,
|
|
|
3992 |
"model.layers.5.self_attn.o_proj": {
|
3993 |
"bias": null,
|
3994 |
"enable_norm": true,
|
3995 |
+
"enable_perm": false,
|
3996 |
"group_num": 1,
|
3997 |
"group_size": 3584,
|
3998 |
"in_features": 3584,
|
|
|
4016 |
"model.layers.5.self_attn.q_proj": {
|
4017 |
"bias": true,
|
4018 |
"enable_norm": true,
|
4019 |
+
"enable_perm": false,
|
4020 |
"group_num": 1,
|
4021 |
"group_size": 3584,
|
4022 |
"in_features": 3584,
|
|
|
4040 |
"model.layers.5.self_attn.v_proj": {
|
4041 |
"bias": true,
|
4042 |
"enable_norm": true,
|
4043 |
+
"enable_perm": false,
|
4044 |
"group_num": 1,
|
4045 |
"group_size": 3584,
|
4046 |
"in_features": 3584,
|
|
|
4064 |
"model.layers.6.mlp.down_proj": {
|
4065 |
"bias": null,
|
4066 |
"enable_norm": true,
|
4067 |
+
"enable_perm": false,
|
4068 |
"group_num": 1,
|
4069 |
"group_size": 18944,
|
4070 |
"in_features": 18944,
|
|
|
4088 |
"model.layers.6.mlp.gate_proj": {
|
4089 |
"bias": null,
|
4090 |
"enable_norm": true,
|
4091 |
+
"enable_perm": false,
|
4092 |
"group_num": 1,
|
4093 |
"group_size": 3584,
|
4094 |
"in_features": 3584,
|
|
|
4112 |
"model.layers.6.mlp.up_proj": {
|
4113 |
"bias": null,
|
4114 |
"enable_norm": true,
|
4115 |
+
"enable_perm": false,
|
4116 |
"group_num": 1,
|
4117 |
"group_size": 3584,
|
4118 |
"in_features": 3584,
|
|
|
4136 |
"model.layers.6.self_attn.k_proj": {
|
4137 |
"bias": true,
|
4138 |
"enable_norm": true,
|
4139 |
+
"enable_perm": false,
|
4140 |
"group_num": 1,
|
4141 |
"group_size": 3584,
|
4142 |
"in_features": 3584,
|
|
|
4160 |
"model.layers.6.self_attn.o_proj": {
|
4161 |
"bias": null,
|
4162 |
"enable_norm": true,
|
4163 |
+
"enable_perm": false,
|
4164 |
"group_num": 1,
|
4165 |
"group_size": 3584,
|
4166 |
"in_features": 3584,
|
|
|
4184 |
"model.layers.6.self_attn.q_proj": {
|
4185 |
"bias": true,
|
4186 |
"enable_norm": true,
|
4187 |
+
"enable_perm": false,
|
4188 |
"group_num": 1,
|
4189 |
"group_size": 3584,
|
4190 |
"in_features": 3584,
|
|
|
4208 |
"model.layers.6.self_attn.v_proj": {
|
4209 |
"bias": true,
|
4210 |
"enable_norm": true,
|
4211 |
+
"enable_perm": false,
|
4212 |
"group_num": 1,
|
4213 |
"group_size": 3584,
|
4214 |
"in_features": 3584,
|
|
|
4232 |
"model.layers.7.mlp.down_proj": {
|
4233 |
"bias": null,
|
4234 |
"enable_norm": true,
|
4235 |
+
"enable_perm": false,
|
4236 |
"group_num": 1,
|
4237 |
"group_size": 18944,
|
4238 |
"in_features": 18944,
|
|
|
4256 |
"model.layers.7.mlp.gate_proj": {
|
4257 |
"bias": null,
|
4258 |
"enable_norm": true,
|
4259 |
+
"enable_perm": false,
|
4260 |
"group_num": 1,
|
4261 |
"group_size": 3584,
|
4262 |
"in_features": 3584,
|
|
|
4280 |
"model.layers.7.mlp.up_proj": {
|
4281 |
"bias": null,
|
4282 |
"enable_norm": true,
|
4283 |
+
"enable_perm": false,
|
4284 |
"group_num": 1,
|
4285 |
"group_size": 3584,
|
4286 |
"in_features": 3584,
|
|
|
4304 |
"model.layers.7.self_attn.k_proj": {
|
4305 |
"bias": true,
|
4306 |
"enable_norm": true,
|
4307 |
+
"enable_perm": false,
|
4308 |
"group_num": 1,
|
4309 |
"group_size": 3584,
|
4310 |
"in_features": 3584,
|
|
|
4328 |
"model.layers.7.self_attn.o_proj": {
|
4329 |
"bias": null,
|
4330 |
"enable_norm": true,
|
4331 |
+
"enable_perm": false,
|
4332 |
"group_num": 1,
|
4333 |
"group_size": 3584,
|
4334 |
"in_features": 3584,
|
|
|
4352 |
"model.layers.7.self_attn.q_proj": {
|
4353 |
"bias": true,
|
4354 |
"enable_norm": true,
|
4355 |
+
"enable_perm": false,
|
4356 |
"group_num": 1,
|
4357 |
"group_size": 3584,
|
4358 |
"in_features": 3584,
|
|
|
4376 |
"model.layers.7.self_attn.v_proj": {
|
4377 |
"bias": true,
|
4378 |
"enable_norm": true,
|
4379 |
+
"enable_perm": false,
|
4380 |
"group_num": 1,
|
4381 |
"group_size": 3584,
|
4382 |
"in_features": 3584,
|
|
|
4400 |
"model.layers.8.mlp.down_proj": {
|
4401 |
"bias": null,
|
4402 |
"enable_norm": true,
|
4403 |
+
"enable_perm": false,
|
4404 |
"group_num": 1,
|
4405 |
"group_size": 18944,
|
4406 |
"in_features": 18944,
|
|
|
4424 |
"model.layers.8.mlp.gate_proj": {
|
4425 |
"bias": null,
|
4426 |
"enable_norm": true,
|
4427 |
+
"enable_perm": false,
|
4428 |
"group_num": 1,
|
4429 |
"group_size": 3584,
|
4430 |
"in_features": 3584,
|
|
|
4448 |
"model.layers.8.mlp.up_proj": {
|
4449 |
"bias": null,
|
4450 |
"enable_norm": true,
|
4451 |
+
"enable_perm": false,
|
4452 |
"group_num": 1,
|
4453 |
"group_size": 3584,
|
4454 |
"in_features": 3584,
|
|
|
4472 |
"model.layers.8.self_attn.k_proj": {
|
4473 |
"bias": true,
|
4474 |
"enable_norm": true,
|
4475 |
+
"enable_perm": false,
|
4476 |
"group_num": 1,
|
4477 |
"group_size": 3584,
|
4478 |
"in_features": 3584,
|
|
|
4496 |
"model.layers.8.self_attn.o_proj": {
|
4497 |
"bias": null,
|
4498 |
"enable_norm": true,
|
4499 |
+
"enable_perm": false,
|
4500 |
"group_num": 1,
|
4501 |
"group_size": 3584,
|
4502 |
"in_features": 3584,
|
|
|
4520 |
"model.layers.8.self_attn.q_proj": {
|
4521 |
"bias": true,
|
4522 |
"enable_norm": true,
|
4523 |
+
"enable_perm": false,
|
4524 |
"group_num": 1,
|
4525 |
"group_size": 3584,
|
4526 |
"in_features": 3584,
|
|
|
4544 |
"model.layers.8.self_attn.v_proj": {
|
4545 |
"bias": true,
|
4546 |
"enable_norm": true,
|
4547 |
+
"enable_perm": false,
|
4548 |
"group_num": 1,
|
4549 |
"group_size": 3584,
|
4550 |
"in_features": 3584,
|
|
|
4568 |
"model.layers.9.mlp.down_proj": {
|
4569 |
"bias": null,
|
4570 |
"enable_norm": true,
|
4571 |
+
"enable_perm": false,
|
4572 |
"group_num": 1,
|
4573 |
"group_size": 18944,
|
4574 |
"in_features": 18944,
|
|
|
4592 |
"model.layers.9.mlp.gate_proj": {
|
4593 |
"bias": null,
|
4594 |
"enable_norm": true,
|
4595 |
+
"enable_perm": false,
|
4596 |
"group_num": 1,
|
4597 |
"group_size": 3584,
|
4598 |
"in_features": 3584,
|
|
|
4616 |
"model.layers.9.mlp.up_proj": {
|
4617 |
"bias": null,
|
4618 |
"enable_norm": true,
|
4619 |
+
"enable_perm": false,
|
4620 |
"group_num": 1,
|
4621 |
"group_size": 3584,
|
4622 |
"in_features": 3584,
|
|
|
4640 |
"model.layers.9.self_attn.k_proj": {
|
4641 |
"bias": true,
|
4642 |
"enable_norm": true,
|
4643 |
+
"enable_perm": false,
|
4644 |
"group_num": 1,
|
4645 |
"group_size": 3584,
|
4646 |
"in_features": 3584,
|
|
|
4664 |
"model.layers.9.self_attn.o_proj": {
|
4665 |
"bias": null,
|
4666 |
"enable_norm": true,
|
4667 |
+
"enable_perm": false,
|
4668 |
"group_num": 1,
|
4669 |
"group_size": 3584,
|
4670 |
"in_features": 3584,
|
|
|
4688 |
"model.layers.9.self_attn.q_proj": {
|
4689 |
"bias": true,
|
4690 |
"enable_norm": true,
|
4691 |
+
"enable_perm": false,
|
4692 |
"group_num": 1,
|
4693 |
"group_size": 3584,
|
4694 |
"in_features": 3584,
|
|
|
4712 |
"model.layers.9.self_attn.v_proj": {
|
4713 |
"bias": true,
|
4714 |
"enable_norm": true,
|
4715 |
+
"enable_perm": false,
|
4716 |
"group_num": 1,
|
4717 |
"group_size": 3584,
|
4718 |
"in_features": 3584,
|