|
layer,module,loss,samples,damp,time
|
|
0,self_attn.k_proj,0.0000000015,0.01000,1.712
|
|
0,self_attn.v_proj,0.0000000012,0.01000,0.925
|
|
0,self_attn.q_proj,0.0000000049,0.01000,0.904
|
|
0,self_attn.o_proj,0.0000000163,0.01000,0.882
|
|
0,mlp.up_proj,0.0000005740,0.01000,0.911
|
|
0,mlp.gate_proj,0.0000006532,0.01000,0.902
|
|
0,mlp.down_proj,0.0000001969,0.01000,3.010
|
|
1,self_attn.k_proj,0.0000000043,0.01000,0.906
|
|
1,self_attn.v_proj,0.0000000045,0.01000,0.888
|
|
1,self_attn.q_proj,0.0000000146,0.01000,0.913
|
|
1,self_attn.o_proj,0.0000000275,0.01000,0.908
|
|
1,mlp.up_proj,0.0000052082,0.01000,0.939
|
|
1,mlp.gate_proj,0.0000131845,0.01000,0.924
|
|
1,mlp.down_proj,0.0000001877,0.01000,3.054
|
|
2,self_attn.k_proj,0.0000000112,0.01000,0.878
|
|
2,self_attn.v_proj,0.0000000108,0.01000,0.871
|
|
2,self_attn.q_proj,0.0000000380,0.01000,0.874
|
|
2,self_attn.o_proj,0.0000000328,0.01000,0.881
|
|
2,mlp.up_proj,0.0000202890,0.01000,0.915
|
|
2,mlp.gate_proj,0.0000263211,0.01000,0.912
|
|
2,mlp.down_proj,0.0000001653,0.01000,3.074
|
|
3,self_attn.k_proj,0.0000000193,0.01000,0.885
|
|
3,self_attn.v_proj,0.0000000196,0.01000,0.870
|
|
3,self_attn.q_proj,0.0000000692,0.01000,0.886
|
|
3,self_attn.o_proj,0.0000000536,0.01000,0.885
|
|
3,mlp.up_proj,0.0000122846,0.01000,0.917
|
|
3,mlp.gate_proj,0.0000268380,0.01000,0.910
|
|
3,mlp.down_proj,0.0000004506,0.01000,3.076
|
|
4,self_attn.k_proj,0.0000000434,0.01000,0.890
|
|
4,self_attn.v_proj,0.0000000442,0.01000,0.867
|
|
4,self_attn.q_proj,0.0000001510,0.01000,0.881
|
|
4,self_attn.o_proj,0.0000000948,0.01000,0.883
|
|
4,mlp.up_proj,0.0000121759,0.01000,0.916
|
|
4,mlp.gate_proj,0.0000322729,0.01000,0.909
|
|
4,mlp.down_proj,0.0000008796,0.01000,3.055
|
|
5,self_attn.k_proj,0.0000000433,0.01000,0.896
|
|
5,self_attn.v_proj,0.0000000457,0.01000,0.875
|
|
5,self_attn.q_proj,0.0000001578,0.01000,0.884
|
|
5,self_attn.o_proj,0.0000001365,0.01000,0.903
|
|
5,mlp.up_proj,0.0000080894,0.01000,0.927
|
|
5,mlp.gate_proj,0.0000179048,0.01000,0.915
|
|
5,mlp.down_proj,0.0000013001,0.01000,3.052
|
|
6,self_attn.k_proj,0.0000000803,0.01000,0.905
|
|
6,self_attn.v_proj,0.0000000884,0.01000,0.899
|
|
6,self_attn.q_proj,0.0000003184,0.01000,0.897
|
|
6,self_attn.o_proj,0.0000003676,0.01000,0.900
|
|
6,mlp.up_proj,0.0000104167,0.01000,0.969
|
|
6,mlp.gate_proj,0.0000199433,0.01000,0.937
|
|
6,mlp.down_proj,0.0001922736,0.01000,3.135
|
|
7,self_attn.k_proj,0.0000001980,0.01000,0.901
|
|
7,self_attn.v_proj,0.0000001892,0.01000,0.869
|
|
7,self_attn.q_proj,0.0000006958,0.01000,0.908
|
|
7,self_attn.o_proj,0.0000003350,0.01000,0.886
|
|
7,mlp.up_proj,0.0000127284,0.01000,0.918
|
|
7,mlp.gate_proj,0.0000226174,0.01000,0.961
|
|
7,mlp.down_proj,0.0000018001,0.01000,3.109
|
|
8,self_attn.k_proj,0.0000003166,0.01000,0.902
|
|
8,self_attn.v_proj,0.0000003271,0.01000,0.889
|
|
8,self_attn.q_proj,0.0000011330,0.01000,0.884
|
|
8,self_attn.o_proj,0.0000005447,0.01000,0.886
|
|
8,mlp.up_proj,0.0000123870,0.01000,0.920
|
|
8,mlp.gate_proj,0.0000142148,0.01000,0.908
|
|
8,mlp.down_proj,0.0000031396,0.01000,3.126
|
|
9,self_attn.k_proj,0.0000004336,0.01000,0.904
|
|
9,self_attn.v_proj,0.0000003877,0.01000,0.886
|
|
9,self_attn.q_proj,0.0000014913,0.01000,0.897
|
|
9,self_attn.o_proj,0.0000006713,0.01000,0.904
|
|
9,mlp.up_proj,0.0000159213,0.01000,0.936
|
|
9,mlp.gate_proj,0.0000215900,0.01000,0.928
|
|
9,mlp.down_proj,0.0000036478,0.01000,3.164
|
|
10,self_attn.k_proj,0.0000007654,0.01000,0.947
|
|
10,self_attn.v_proj,0.0000008362,0.01000,0.891
|
|
10,self_attn.q_proj,0.0000027566,0.01000,0.931
|
|
10,self_attn.o_proj,0.0000010241,0.01000,0.932
|
|
10,mlp.up_proj,0.0000156613,0.01000,0.940
|
|
10,mlp.gate_proj,0.0000200901,0.01000,0.928
|
|
10,mlp.down_proj,0.0000037414,0.01000,3.141
|
|
11,self_attn.k_proj,0.0000003988,0.01000,0.888
|
|
11,self_attn.v_proj,0.0000003882,0.01000,0.876
|
|
11,self_attn.q_proj,0.0000013864,0.01000,0.878
|
|
11,self_attn.o_proj,0.0000008979,0.01000,0.894
|
|
11,mlp.up_proj,0.0000157748,0.01000,0.921
|
|
11,mlp.gate_proj,0.0000185915,0.01000,0.910
|
|
11,mlp.down_proj,0.0000037746,0.01000,3.063
|
|
12,self_attn.k_proj,0.0000004513,0.01000,0.886
|
|
12,self_attn.v_proj,0.0000004669,0.01000,0.875
|
|
12,self_attn.q_proj,0.0000015881,0.01000,0.911
|
|
12,self_attn.o_proj,0.0000010671,0.01000,0.884
|
|
12,mlp.up_proj,0.0000165085,0.01000,0.920
|
|
12,mlp.gate_proj,0.0000181187,0.01000,0.920
|
|
12,mlp.down_proj,0.0000039800,0.01000,3.168
|
|
13,self_attn.k_proj,0.0000003635,0.01000,0.885
|
|
13,self_attn.v_proj,0.0000003458,0.01000,0.869
|
|
13,self_attn.q_proj,0.0000013292,0.01000,0.887
|
|
13,self_attn.o_proj,0.0000007286,0.01000,0.885
|
|
13,mlp.up_proj,0.0000171527,0.01000,0.919
|
|
13,mlp.gate_proj,0.0000174607,0.01000,0.912
|
|
13,mlp.down_proj,0.0000045170,0.01000,2.989
|
|
14,self_attn.k_proj,0.0000006080,0.01000,0.885
|
|
14,self_attn.v_proj,0.0000006287,0.01000,0.869
|
|
14,self_attn.q_proj,0.0000022256,0.01000,0.874
|
|
14,self_attn.o_proj,0.0000014882,0.01000,0.883
|
|
14,mlp.up_proj,0.0000173904,0.01000,0.955
|
|
14,mlp.gate_proj,0.0000175352,0.01000,0.913
|
|
14,mlp.down_proj,0.0000043650,0.01000,3.159
|
|
15,self_attn.k_proj,0.0000006405,0.01000,0.895
|
|
15,self_attn.v_proj,0.0000006059,0.01000,0.872
|
|
15,self_attn.q_proj,0.0000022680,0.01000,0.889
|
|
15,self_attn.o_proj,0.0000010979,0.01000,0.885
|
|
15,mlp.up_proj,0.0000170070,0.01000,0.919
|
|
15,mlp.gate_proj,0.0000164598,0.01000,0.914
|
|
15,mlp.down_proj,0.0000044099,0.01000,3.104
|
|
16,self_attn.k_proj,0.0000010972,0.01000,0.891
|
|
16,self_attn.v_proj,0.0000012423,0.01000,0.879
|
|
16,self_attn.q_proj,0.0000042154,0.01000,0.882
|
|
16,self_attn.o_proj,0.0000014473,0.01000,0.901
|
|
16,mlp.up_proj,0.0000186370,0.01000,0.926
|
|
16,mlp.gate_proj,0.0000186707,0.01000,0.912
|
|
16,mlp.down_proj,0.0000095448,0.01000,3.062
|
|
17,self_attn.k_proj,0.0000010145,0.01000,0.890
|
|
17,self_attn.v_proj,0.0000011109,0.01000,0.885
|
|
17,self_attn.q_proj,0.0000040763,0.01000,0.883
|
|
17,self_attn.o_proj,0.0000016497,0.01000,0.920
|
|
17,mlp.up_proj,0.0000180247,0.01000,0.921
|
|
17,mlp.gate_proj,0.0000175535,0.01000,0.915
|
|
17,mlp.down_proj,0.0000056334,0.01000,3.068
|
|
18,self_attn.k_proj,0.0000011954,0.01000,0.886
|
|
18,self_attn.v_proj,0.0000012692,0.01000,0.873
|
|
18,self_attn.q_proj,0.0000045311,0.01000,0.887
|
|
18,self_attn.o_proj,0.0000019605,0.01000,0.890
|
|
18,mlp.up_proj,0.0000205253,0.01000,0.920
|
|
18,mlp.gate_proj,0.0000200017,0.01000,0.919
|
|
18,mlp.down_proj,0.0000065850,0.01000,3.093
|
|
19,self_attn.k_proj,0.0000023365,0.01000,0.903
|
|
19,self_attn.v_proj,0.0000024214,0.01000,0.891
|
|
19,self_attn.q_proj,0.0000092070,0.01000,0.905
|
|
19,self_attn.o_proj,0.0000029968,0.01000,0.906
|
|
19,mlp.up_proj,0.0000228412,0.01000,0.938
|
|
19,mlp.gate_proj,0.0000218078,0.01000,0.930
|
|
19,mlp.down_proj,0.0000084889,0.01000,3.115
|
|
20,self_attn.k_proj,0.0000020128,0.01000,0.905
|
|
20,self_attn.v_proj,0.0000022445,0.01000,0.890
|
|
20,self_attn.q_proj,0.0000085190,0.01000,0.901
|
|
20,self_attn.o_proj,0.0000028852,0.01000,0.905
|
|
20,mlp.up_proj,0.0000252495,0.01000,0.918
|
|
20,mlp.gate_proj,0.0000236520,0.01000,0.908
|
|
20,mlp.down_proj,0.0000100402,0.01000,3.059
|
|
21,self_attn.k_proj,0.0000027516,0.01000,0.893
|
|
21,self_attn.v_proj,0.0000032758,0.01000,0.875
|
|
21,self_attn.q_proj,0.0000112753,0.01000,0.885
|
|
21,self_attn.o_proj,0.0000033707,0.01000,0.911
|
|
21,mlp.up_proj,0.0000297586,0.01000,0.940
|
|
21,mlp.gate_proj,0.0000277278,0.01000,0.935
|
|
21,mlp.down_proj,0.0000132390,0.01000,3.054
|
|
22,self_attn.k_proj,0.0000062124,0.01000,0.899
|
|
22,self_attn.v_proj,0.0000065444,0.01000,0.876
|
|
22,self_attn.q_proj,0.0000241311,0.01000,0.885
|
|
22,self_attn.o_proj,0.0000067822,0.01000,0.898
|
|
22,mlp.up_proj,0.0000342028,0.01000,0.931
|
|
22,mlp.gate_proj,0.0000322704,0.01000,0.915
|
|
22,mlp.down_proj,0.0000216577,0.01000,3.156
|
|
23,self_attn.k_proj,0.0000061361,0.01000,0.893
|
|
23,self_attn.v_proj,0.0000074565,0.01000,0.877
|
|
23,self_attn.q_proj,0.0000253022,0.01000,0.890
|
|
23,self_attn.o_proj,0.0000105262,0.01000,0.907
|
|
23,mlp.up_proj,0.0000393763,0.01000,0.937
|
|
23,mlp.gate_proj,0.0000381544,0.01000,0.920
|
|
23,mlp.down_proj,0.0000307144,0.01000,3.058
|
|
24,self_attn.k_proj,0.0000111583,0.01000,0.886
|
|
24,self_attn.v_proj,0.0000144551,0.01000,0.882
|
|
24,self_attn.q_proj,0.0000454749,0.01000,0.881
|
|
24,self_attn.o_proj,0.0000115710,0.01000,0.888
|
|
24,mlp.up_proj,0.0000445548,0.01000,0.925
|
|
24,mlp.gate_proj,0.0000436983,0.01000,0.921
|
|
24,mlp.down_proj,0.0000383308,0.01000,3.064
|
|
25,self_attn.k_proj,0.0000089802,0.01000,0.929
|
|
25,self_attn.v_proj,0.0000106912,0.01000,0.883
|
|
25,self_attn.q_proj,0.0000350779,0.01000,0.883
|
|
25,self_attn.o_proj,0.0000077190,0.01000,0.895
|
|
25,mlp.up_proj,0.0000516038,0.01000,0.919
|
|
25,mlp.gate_proj,0.0000512468,0.01000,0.922
|
|
25,mlp.down_proj,0.0000478184,0.01000,3.053
|
|
26,self_attn.k_proj,0.0000148602,0.01000,0.888
|
|
26,self_attn.v_proj,0.0000182355,0.01000,0.872
|
|
26,self_attn.q_proj,0.0000605304,0.01000,0.891
|
|
26,self_attn.o_proj,0.0000092113,0.01000,0.893
|
|
26,mlp.up_proj,0.0000635605,0.01000,0.922
|
|
26,mlp.gate_proj,0.0000621471,0.01000,0.953
|
|
26,mlp.down_proj,0.0000573816,0.01000,3.140
|
|
27,self_attn.k_proj,0.0000167369,0.01000,0.894
|
|
27,self_attn.v_proj,0.0000221709,0.01000,0.875
|
|
27,self_attn.q_proj,0.0000698549,0.01000,0.890
|
|
27,self_attn.o_proj,0.0000105676,0.01000,0.896
|
|
27,mlp.up_proj,0.0000737238,0.01000,0.923
|
|
27,mlp.gate_proj,0.0000708008,0.01000,0.917
|
|
27,mlp.down_proj,0.0000797470,0.01000,3.123
|
|
28,self_attn.k_proj,0.0000194154,0.01000,0.890
|
|
28,self_attn.v_proj,0.0000222215,0.01000,0.879
|
|
28,self_attn.q_proj,0.0000736696,0.01000,0.886
|
|
28,self_attn.o_proj,0.0000162227,0.01000,0.888
|
|
28,mlp.up_proj,0.0000866654,0.01000,0.921
|
|
28,mlp.gate_proj,0.0000811015,0.01000,0.913
|
|
28,mlp.down_proj,0.0001118279,0.01000,3.063
|
|
29,self_attn.k_proj,0.0000441453,0.01000,0.904
|
|
29,self_attn.v_proj,0.0000646906,0.01000,0.876
|
|
29,self_attn.q_proj,0.0001756297,0.01000,0.882
|
|
29,self_attn.o_proj,0.0000162280,0.01000,0.892
|
|
29,mlp.up_proj,0.0001036480,0.01000,0.931
|
|
29,mlp.gate_proj,0.0000942627,0.01000,0.924
|
|
29,mlp.down_proj,0.0001289722,0.01000,3.069
|
|
30,self_attn.k_proj,0.0000534550,0.01000,0.894
|
|
30,self_attn.v_proj,0.0000700705,0.01000,0.872
|
|
30,self_attn.q_proj,0.0002013896,0.01000,0.881
|
|
30,self_attn.o_proj,0.0000261334,0.01000,0.895
|
|
30,mlp.up_proj,0.0001134598,0.01000,0.931
|
|
30,mlp.gate_proj,0.0001007077,0.01000,0.915
|
|
30,mlp.down_proj,0.0001700256,0.01000,3.058
|
|
31,self_attn.k_proj,0.0000754044,0.01000,0.894
|
|
31,self_attn.v_proj,0.0001038457,0.01000,0.885
|
|
31,self_attn.q_proj,0.0002635791,0.01000,0.893
|
|
31,self_attn.o_proj,0.0000305994,0.01000,0.901
|
|
31,mlp.up_proj,0.0001222418,0.01000,0.931
|
|
31,mlp.gate_proj,0.0001049834,0.01000,0.918
|
|
31,mlp.down_proj,0.0002177585,0.01000,3.056
|
|
32,self_attn.k_proj,0.0001011146,0.01000,0.890
|
|
32,self_attn.v_proj,0.0001517884,0.01000,0.882
|
|
32,self_attn.q_proj,0.0003828967,0.01000,0.891
|
|
32,self_attn.o_proj,0.0000431652,0.01000,0.904
|
|
32,mlp.up_proj,0.0001302504,0.01000,0.928
|
|
32,mlp.gate_proj,0.0001096425,0.01000,0.925
|
|
32,mlp.down_proj,0.0002804651,0.01000,3.047
|
|
33,self_attn.k_proj,0.0001741775,0.01000,0.892
|
|
33,self_attn.v_proj,0.0003288537,0.01000,0.878
|
|
33,self_attn.q_proj,0.0007499020,0.01000,0.897
|
|
33,self_attn.o_proj,0.0000649437,0.01000,0.896
|
|
33,mlp.up_proj,0.0001392706,0.01000,0.924
|
|
33,mlp.gate_proj,0.0001153356,0.01000,0.922
|
|
33,mlp.down_proj,0.0003548277,0.01000,3.062
|
|
34,self_attn.k_proj,0.0001468701,0.01000,0.890
|
|
34,self_attn.v_proj,0.0002285834,0.01000,0.875
|
|
34,self_attn.q_proj,0.0006159075,0.01000,0.894
|
|
34,self_attn.o_proj,0.0001155951,0.01000,0.894
|
|
34,mlp.up_proj,0.0001467356,0.01000,0.921
|
|
34,mlp.gate_proj,0.0001274698,0.01000,0.916
|
|
34,mlp.down_proj,0.0004528920,0.01000,3.020
|
|
35,self_attn.k_proj,0.0000713502,0.01000,0.897
|
|
35,self_attn.v_proj,0.0000922083,0.01000,0.875
|
|
35,self_attn.q_proj,0.0002660156,0.01000,0.889
|
|
35,self_attn.o_proj,0.0000814533,0.01000,0.904
|
|
35,mlp.up_proj,0.0001837838,0.01000,0.938
|
|
35,mlp.gate_proj,0.0001648423,0.01000,0.911
|
|
35,mlp.down_proj,0.0008856815,0.01000,3.072
|
|
|