|
layer,module,loss,samples,damp,time
|
|
0,self_attn.k_proj,0.0000078315,0.01000,2.317
|
|
0,self_attn.v_proj,0.0000000732,0.01000,0.980
|
|
0,self_attn.q_proj,0.0000114605,0.01000,1.031
|
|
0,self_attn.o_proj,0.0000000054,0.01000,0.971
|
|
0,mlp.gate_proj,0.0000059708,0.01000,1.006
|
|
0,mlp.up_proj,0.0000049588,0.01000,0.997
|
|
0,mlp.down_proj,0.0000000220,0.01000,3.849
|
|
1,self_attn.k_proj,0.0000048128,0.01000,0.969
|
|
1,self_attn.v_proj,0.0000001860,0.01000,0.973
|
|
1,self_attn.q_proj,0.0000084727,0.01000,1.044
|
|
1,self_attn.o_proj,0.0000000151,0.01000,0.982
|
|
1,mlp.gate_proj,0.0000113100,0.01000,1.001
|
|
1,mlp.up_proj,0.0000095952,0.01000,1.003
|
|
1,mlp.down_proj,0.0000030217,0.01000,3.866
|
|
2,self_attn.k_proj,0.0000267097,0.01000,0.972
|
|
2,self_attn.v_proj,0.0000011418,0.01000,0.972
|
|
2,self_attn.q_proj,0.0000400319,0.01000,1.003
|
|
2,self_attn.o_proj,0.0000000189,0.01000,0.986
|
|
2,mlp.gate_proj,0.0000204956,0.01000,0.998
|
|
2,mlp.up_proj,0.0000164531,0.01000,1.005
|
|
2,mlp.down_proj,0.0000001085,0.01000,3.871
|
|
3,self_attn.k_proj,0.0000231683,0.01000,0.964
|
|
3,self_attn.v_proj,0.0000021031,0.01000,0.984
|
|
3,self_attn.q_proj,0.0000408762,0.01000,0.999
|
|
3,self_attn.o_proj,0.0000000440,0.01000,0.985
|
|
3,mlp.gate_proj,0.0000323238,0.01000,1.004
|
|
3,mlp.up_proj,0.0000226494,0.01000,1.018
|
|
3,mlp.down_proj,0.0000002050,0.01000,3.901
|
|
4,self_attn.k_proj,0.0000213612,0.01000,0.963
|
|
4,self_attn.v_proj,0.0000020058,0.01000,0.971
|
|
4,self_attn.q_proj,0.0000370368,0.01000,0.990
|
|
4,self_attn.o_proj,0.0000000814,0.01000,0.977
|
|
4,mlp.gate_proj,0.0000454558,0.01000,0.993
|
|
4,mlp.up_proj,0.0000279343,0.01000,0.995
|
|
4,mlp.down_proj,0.0000003546,0.01000,3.879
|
|
5,self_attn.k_proj,0.0000343131,0.01000,0.964
|
|
5,self_attn.v_proj,0.0000019262,0.01000,0.974
|
|
5,self_attn.q_proj,0.0000534258,0.01000,0.990
|
|
5,self_attn.o_proj,0.0000001064,0.01000,0.973
|
|
5,mlp.gate_proj,0.0000543252,0.01000,1.047
|
|
5,mlp.up_proj,0.0000338141,0.01000,0.992
|
|
5,mlp.down_proj,0.0000005027,0.01000,3.877
|
|
6,self_attn.k_proj,0.0000277318,0.01000,0.967
|
|
6,self_attn.v_proj,0.0000021740,0.01000,0.969
|
|
6,self_attn.q_proj,0.0000489093,0.01000,1.001
|
|
6,self_attn.o_proj,0.0000001838,0.01000,0.987
|
|
6,mlp.gate_proj,0.0000600048,0.01000,1.000
|
|
6,mlp.up_proj,0.0000369173,0.01000,1.005
|
|
6,mlp.down_proj,0.0000006183,0.01000,3.896
|
|
7,self_attn.k_proj,0.0000276680,0.01000,0.965
|
|
7,self_attn.v_proj,0.0000022922,0.01000,1.025
|
|
7,self_attn.q_proj,0.0000457158,0.01000,0.984
|
|
7,self_attn.o_proj,0.0000002600,0.01000,0.981
|
|
7,mlp.gate_proj,0.0000606045,0.01000,1.018
|
|
7,mlp.up_proj,0.0000397255,0.01000,0.989
|
|
7,mlp.down_proj,0.0000007190,0.01000,4.074
|
|
8,self_attn.k_proj,0.0000356829,0.01000,1.091
|
|
8,self_attn.v_proj,0.0000030032,0.01000,0.977
|
|
8,self_attn.q_proj,0.0000576325,0.01000,0.995
|
|
8,self_attn.o_proj,0.0000003418,0.01000,0.987
|
|
8,mlp.gate_proj,0.0000648441,0.01000,1.019
|
|
8,mlp.up_proj,0.0000420555,0.01000,1.080
|
|
8,mlp.down_proj,0.0000007795,0.01000,3.984
|
|
9,self_attn.k_proj,0.0000341801,0.01000,1.010
|
|
9,self_attn.v_proj,0.0000042894,0.01000,0.983
|
|
9,self_attn.q_proj,0.0000572199,0.01000,0.998
|
|
9,self_attn.o_proj,0.0000004127,0.01000,0.991
|
|
9,mlp.gate_proj,0.0000679240,0.01000,1.003
|
|
9,mlp.up_proj,0.0000437712,0.01000,1.009
|
|
9,mlp.down_proj,0.0000008496,0.01000,3.899
|
|
10,self_attn.k_proj,0.0000410061,0.01000,0.981
|
|
10,self_attn.v_proj,0.0000032449,0.01000,0.977
|
|
10,self_attn.q_proj,0.0000667809,0.01000,1.006
|
|
10,self_attn.o_proj,0.0000003859,0.01000,0.991
|
|
10,mlp.gate_proj,0.0000659345,0.01000,1.045
|
|
10,mlp.up_proj,0.0000457288,0.01000,1.000
|
|
10,mlp.down_proj,0.0000008926,0.01000,4.069
|
|
11,self_attn.k_proj,0.0000374975,0.01000,0.963
|
|
11,self_attn.v_proj,0.0000034291,0.01000,0.970
|
|
11,self_attn.q_proj,0.0000586573,0.01000,0.993
|
|
11,self_attn.o_proj,0.0000003994,0.01000,1.036
|
|
11,mlp.gate_proj,0.0000670856,0.01000,1.000
|
|
11,mlp.up_proj,0.0000481108,0.01000,1.004
|
|
11,mlp.down_proj,0.0000009515,0.01000,4.067
|
|
12,self_attn.k_proj,0.0000296477,0.01000,1.013
|
|
12,self_attn.v_proj,0.0000041030,0.01000,1.003
|
|
12,self_attn.q_proj,0.0000516405,0.01000,1.082
|
|
12,self_attn.o_proj,0.0000005252,0.01000,0.998
|
|
12,mlp.gate_proj,0.0000650923,0.01000,1.077
|
|
12,mlp.up_proj,0.0000492493,0.01000,1.001
|
|
12,mlp.down_proj,0.0000010790,0.01000,3.883
|
|
13,self_attn.k_proj,0.0000433963,0.01000,0.966
|
|
13,self_attn.v_proj,0.0000044248,0.01000,0.977
|
|
13,self_attn.q_proj,0.0000660645,0.01000,0.975
|
|
13,self_attn.o_proj,0.0000005850,0.01000,0.989
|
|
13,mlp.gate_proj,0.0000678395,0.01000,1.005
|
|
13,mlp.up_proj,0.0000513186,0.01000,1.007
|
|
13,mlp.down_proj,0.0000012084,0.01000,3.902
|
|
14,self_attn.k_proj,0.0000454082,0.01000,0.972
|
|
14,self_attn.v_proj,0.0000045558,0.01000,0.966
|
|
14,self_attn.q_proj,0.0000654248,0.01000,0.987
|
|
14,self_attn.o_proj,0.0000006629,0.01000,0.974
|
|
14,mlp.gate_proj,0.0000793303,0.01000,0.991
|
|
14,mlp.up_proj,0.0000566673,0.01000,0.998
|
|
14,mlp.down_proj,0.0000015292,0.01000,3.900
|
|
15,self_attn.k_proj,0.0000427130,0.01000,0.967
|
|
15,self_attn.v_proj,0.0000056919,0.01000,0.992
|
|
15,self_attn.q_proj,0.0000793933,0.01000,0.997
|
|
15,self_attn.o_proj,0.0000007707,0.01000,0.982
|
|
15,mlp.gate_proj,0.0000886215,0.01000,1.029
|
|
15,mlp.up_proj,0.0000600264,0.01000,1.019
|
|
15,mlp.down_proj,0.0000018517,0.01000,3.867
|
|
16,self_attn.k_proj,0.0000440876,0.01000,0.968
|
|
16,self_attn.v_proj,0.0000051383,0.01000,0.976
|
|
16,self_attn.q_proj,0.0000721971,0.01000,0.989
|
|
16,self_attn.o_proj,0.0000005920,0.01000,0.986
|
|
16,mlp.gate_proj,0.0001011934,0.01000,0.996
|
|
16,mlp.up_proj,0.0000647822,0.01000,1.001
|
|
16,mlp.down_proj,0.0000020315,0.01000,3.861
|
|
17,self_attn.k_proj,0.0000482271,0.01000,0.972
|
|
17,self_attn.v_proj,0.0000057105,0.01000,1.028
|
|
17,self_attn.q_proj,0.0000768861,0.01000,1.002
|
|
17,self_attn.o_proj,0.0000005416,0.01000,0.983
|
|
17,mlp.gate_proj,0.0001087115,0.01000,0.991
|
|
17,mlp.up_proj,0.0000680738,0.01000,1.000
|
|
17,mlp.down_proj,0.0000023693,0.01000,3.856
|
|
18,self_attn.k_proj,0.0000532660,0.01000,0.963
|
|
18,self_attn.v_proj,0.0000058475,0.01000,0.970
|
|
18,self_attn.q_proj,0.0000783183,0.01000,0.996
|
|
18,self_attn.o_proj,0.0000003645,0.01000,0.992
|
|
18,mlp.gate_proj,0.0001173563,0.01000,1.000
|
|
18,mlp.up_proj,0.0000727511,0.01000,1.002
|
|
18,mlp.down_proj,0.0000023655,0.01000,3.857
|
|
19,self_attn.k_proj,0.0000487429,0.01000,0.966
|
|
19,self_attn.v_proj,0.0000065425,0.01000,0.968
|
|
19,self_attn.q_proj,0.0000812822,0.01000,1.003
|
|
19,self_attn.o_proj,0.0000003422,0.01000,0.979
|
|
19,mlp.gate_proj,0.0001255447,0.01000,0.993
|
|
19,mlp.up_proj,0.0000765791,0.01000,0.997
|
|
19,mlp.down_proj,0.0000025128,0.01000,3.880
|
|
20,self_attn.k_proj,0.0000527441,0.01000,0.970
|
|
20,self_attn.v_proj,0.0000070160,0.01000,0.974
|
|
20,self_attn.q_proj,0.0000834531,0.01000,0.993
|
|
20,self_attn.o_proj,0.0000003798,0.01000,0.980
|
|
20,mlp.gate_proj,0.0001346347,0.01000,1.006
|
|
20,mlp.up_proj,0.0000827701,0.01000,1.003
|
|
20,mlp.down_proj,0.0000027525,0.01000,4.004
|
|
21,self_attn.k_proj,0.0000521419,0.01000,0.990
|
|
21,self_attn.v_proj,0.0000077289,0.01000,0.970
|
|
21,self_attn.q_proj,0.0000801831,0.01000,1.006
|
|
21,self_attn.o_proj,0.0000005379,0.01000,0.985
|
|
21,mlp.gate_proj,0.0001441817,0.01000,0.996
|
|
21,mlp.up_proj,0.0000879591,0.01000,1.001
|
|
21,mlp.down_proj,0.0000031674,0.01000,3.876
|
|
22,self_attn.k_proj,0.0000547591,0.01000,0.965
|
|
22,self_attn.v_proj,0.0000089226,0.01000,0.968
|
|
22,self_attn.q_proj,0.0000809640,0.01000,0.996
|
|
22,self_attn.o_proj,0.0000004595,0.01000,0.984
|
|
22,mlp.gate_proj,0.0001504454,0.01000,0.990
|
|
22,mlp.up_proj,0.0000926191,0.01000,0.992
|
|
22,mlp.down_proj,0.0000032868,0.01000,3.852
|
|
23,self_attn.k_proj,0.0000534549,0.01000,0.970
|
|
23,self_attn.v_proj,0.0000098804,0.01000,0.969
|
|
23,self_attn.q_proj,0.0000849904,0.01000,0.994
|
|
23,self_attn.o_proj,0.0000004688,0.01000,0.987
|
|
23,mlp.gate_proj,0.0001580264,0.01000,1.006
|
|
23,mlp.up_proj,0.0000978091,0.01000,0.999
|
|
23,mlp.down_proj,0.0000034938,0.01000,3.878
|
|
24,self_attn.k_proj,0.0000527611,0.01000,0.965
|
|
24,self_attn.v_proj,0.0000122944,0.01000,1.001
|
|
24,self_attn.q_proj,0.0000851840,0.01000,0.987
|
|
24,self_attn.o_proj,0.0000004715,0.01000,0.980
|
|
24,mlp.gate_proj,0.0001690636,0.01000,0.989
|
|
24,mlp.up_proj,0.0001046552,0.01000,0.994
|
|
24,mlp.down_proj,0.0000037665,0.01000,3.855
|
|
25,self_attn.k_proj,0.0000501195,0.01000,0.966
|
|
25,self_attn.v_proj,0.0000129700,0.01000,0.971
|
|
25,self_attn.q_proj,0.0000855239,0.01000,0.991
|
|
25,self_attn.o_proj,0.0000005515,0.01000,0.980
|
|
25,mlp.gate_proj,0.0001802457,0.01000,0.993
|
|
25,mlp.up_proj,0.0001116258,0.01000,1.001
|
|
25,mlp.down_proj,0.0000041575,0.01000,3.899
|
|
26,self_attn.k_proj,0.0000526600,0.01000,0.965
|
|
26,self_attn.v_proj,0.0000119880,0.01000,0.968
|
|
26,self_attn.q_proj,0.0000825395,0.01000,0.988
|
|
26,self_attn.o_proj,0.0000007455,0.01000,1.015
|
|
26,mlp.gate_proj,0.0001940883,0.01000,1.019
|
|
26,mlp.up_proj,0.0001201080,0.01000,1.047
|
|
26,mlp.down_proj,0.0000047189,0.01000,3.845
|
|
27,self_attn.k_proj,0.0000578712,0.01000,0.967
|
|
27,self_attn.v_proj,0.0000172764,0.01000,0.979
|
|
27,self_attn.q_proj,0.0000868170,0.01000,0.997
|
|
27,self_attn.o_proj,0.0000009332,0.01000,0.984
|
|
27,mlp.gate_proj,0.0002109922,0.01000,0.998
|
|
27,mlp.up_proj,0.0001314406,0.01000,1.003
|
|
27,mlp.down_proj,0.0000056249,0.01000,3.926
|
|
28,self_attn.k_proj,0.0000460739,0.01000,0.970
|
|
28,self_attn.v_proj,0.0000153502,0.01000,1.019
|
|
28,self_attn.q_proj,0.0000809999,0.01000,1.003
|
|
28,self_attn.o_proj,0.0000017104,0.01000,0.984
|
|
28,mlp.gate_proj,0.0002234578,0.01000,1.007
|
|
28,mlp.up_proj,0.0001457902,0.01000,1.020
|
|
28,mlp.down_proj,0.0000072474,0.01000,3.887
|
|
29,self_attn.k_proj,0.0000484581,0.01000,0.967
|
|
29,self_attn.v_proj,0.0000186207,0.01000,0.973
|
|
29,self_attn.q_proj,0.0000856461,0.01000,1.019
|
|
29,self_attn.o_proj,0.0000013237,0.01000,1.015
|
|
29,mlp.gate_proj,0.0002276506,0.01000,1.030
|
|
29,mlp.up_proj,0.0001557216,0.01000,1.021
|
|
29,mlp.down_proj,0.0000098171,0.01000,3.893
|
|
30,self_attn.k_proj,0.0000472124,0.01000,1.033
|
|
30,self_attn.v_proj,0.0000248850,0.01000,0.978
|
|
30,self_attn.q_proj,0.0000780871,0.01000,1.005
|
|
30,self_attn.o_proj,0.0000025531,0.01000,0.994
|
|
30,mlp.gate_proj,0.0002439993,0.01000,1.013
|
|
30,mlp.up_proj,0.0001630218,0.01000,1.013
|
|
30,mlp.down_proj,0.0000155139,0.01000,3.999
|
|
31,self_attn.k_proj,0.0000352841,0.01000,0.978
|
|
31,self_attn.v_proj,0.0000151283,0.01000,0.981
|
|
31,self_attn.q_proj,0.0000682465,0.01000,1.007
|
|
31,self_attn.o_proj,0.0000048641,0.01000,0.990
|
|
31,mlp.gate_proj,0.0002152404,0.01000,1.000
|
|
31,mlp.up_proj,0.0001472785,0.01000,1.001
|
|
31,mlp.down_proj,0.0000383563,0.01000,3.860
|
|
|