fnlp
/

Hzfinfdu commited on
Commit
c14c1dc
·
verified ·
1 Parent(s): d6a0620

Upload folder using huggingface_hub

Browse files
L2A/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 256,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 64,
5
+ "n_ov_heads": 32768,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 1024,
11
+ "layer": 2,
12
+ "model_name": "meta-llama/Llama-3.1-8B",
13
+ "mode": "top_k",
14
+ "top_k": 128,
15
+ "avg_norm": {
16
+ "in": 25.82196044921875,
17
+ "out": 0.8914777040481567
18
+ },
19
+ "d_model": 4096,
20
+ "attn_scale": 11.313708498984761,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 256,
24
+ "rotary_base": 500000.0,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": true,
27
+ "NTK_by_parts_low_freq_factor": 1.0,
28
+ "NTK_by_parts_high_freq_factor": 4.0,
29
+ "NTK_by_parts_factor": 8.0,
30
+ "old_context_len": 8192
31
+ }
L2A/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:292697deb870ed9e8ea3ff5db00f022f1181965d97a0db9a6b355465d5505413
3
+ size 1614040466
L2A/metrics.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 1.25998e-05,
3
+ "explained_variance": 0.88186,
4
+ "l1": 4.18,
5
+ "ground_truth_norm": 0.889,
6
+ "reconstructed_norm": 0.862,
7
+ "error_norm": 0.2141,
8
+ "sparsity/below 1e-5": 16279,
9
+ "sparsity/below 1e-6": 15533,
10
+ "positivity": 0.54269,
11
+ "ov_head_live_count": {
12
+ "0": 312,
13
+ "1": 338,
14
+ "2": 186,
15
+ "3": 141,
16
+ "4": 450,
17
+ "5": 439,
18
+ "6": 173,
19
+ "7": 377,
20
+ "8": 167,
21
+ "9": 128,
22
+ "10": 332,
23
+ "11": 349,
24
+ "12": 250,
25
+ "13": 171,
26
+ "14": 422,
27
+ "15": 303,
28
+ "16": 429,
29
+ "17": 188,
30
+ "18": 65,
31
+ "19": 360,
32
+ "20": 321,
33
+ "21": 215,
34
+ "22": 432,
35
+ "23": 426,
36
+ "24": 285,
37
+ "25": 167,
38
+ "26": 345,
39
+ "27": 167,
40
+ "28": 350,
41
+ "29": 461,
42
+ "30": 262,
43
+ "31": 271,
44
+ "32": 79,
45
+ "33": 134,
46
+ "34": 179,
47
+ "35": 356,
48
+ "36": 329,
49
+ "37": 433,
50
+ "38": 269,
51
+ "39": 84,
52
+ "40": 243,
53
+ "41": 443,
54
+ "42": 348,
55
+ "43": 196,
56
+ "44": 169,
57
+ "45": 333,
58
+ "46": 291,
59
+ "47": 171,
60
+ "48": 310,
61
+ "49": 291,
62
+ "50": 375,
63
+ "51": 327,
64
+ "52": 305,
65
+ "53": 92,
66
+ "54": 262,
67
+ "55": 207,
68
+ "56": 236,
69
+ "57": 328,
70
+ "58": 165,
71
+ "59": 395,
72
+ "60": 211,
73
+ "61": 451,
74
+ "62": 68,
75
+ "63": 421
76
+ }
77
+ }
L3A/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 256,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 64,
5
+ "n_ov_heads": 32768,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 1024,
11
+ "layer": 3,
12
+ "model_name": "meta-llama/Llama-3.1-8B",
13
+ "mode": "top_k",
14
+ "top_k": 128,
15
+ "avg_norm": {
16
+ "in": 22.983654022216797,
17
+ "out": 1.1687864065170288
18
+ },
19
+ "d_model": 4096,
20
+ "attn_scale": 11.313708498984761,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 256,
24
+ "rotary_base": 500000.0,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": true,
27
+ "NTK_by_parts_low_freq_factor": 1.0,
28
+ "NTK_by_parts_high_freq_factor": 4.0,
29
+ "NTK_by_parts_factor": 8.0,
30
+ "old_context_len": 8192
31
+ }
L3A/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96f03145dca4a61a1f544ef7ce14602c6492bd6c6c2350c784b4b0e70451446e
3
+ size 1614040466
L3A/metrics.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 4.04951e-05,
3
+ "explained_variance": 0.82606,
4
+ "l1": 5.99,
5
+ "ground_truth_norm": 1.16,
6
+ "reconstructed_norm": 1.089,
7
+ "error_norm": 0.3918,
8
+ "sparsity/below 1e-5": 15990,
9
+ "sparsity/below 1e-6": 15516,
10
+ "positivity": 0.54037,
11
+ "ov_head_live_count": {
12
+ "0": 290,
13
+ "1": 176,
14
+ "2": 267,
15
+ "3": 238,
16
+ "4": 334,
17
+ "5": 57,
18
+ "6": 472,
19
+ "7": 370,
20
+ "8": 150,
21
+ "9": 158,
22
+ "10": 340,
23
+ "11": 212,
24
+ "12": 254,
25
+ "13": 425,
26
+ "14": 233,
27
+ "15": 418,
28
+ "16": 465,
29
+ "17": 396,
30
+ "18": 458,
31
+ "19": 64,
32
+ "20": 346,
33
+ "21": 162,
34
+ "22": 166,
35
+ "23": 367,
36
+ "24": 270,
37
+ "25": 273,
38
+ "26": 185,
39
+ "27": 236,
40
+ "28": 230,
41
+ "29": 343,
42
+ "30": 410,
43
+ "31": 238,
44
+ "32": 207,
45
+ "33": 26,
46
+ "34": 129,
47
+ "35": 148,
48
+ "36": 104,
49
+ "37": 305,
50
+ "38": 478,
51
+ "39": 364,
52
+ "40": 177,
53
+ "41": 434,
54
+ "42": 360,
55
+ "43": 491,
56
+ "44": 162,
57
+ "45": 342,
58
+ "46": 299,
59
+ "47": 363,
60
+ "48": 60,
61
+ "49": 254,
62
+ "50": 299,
63
+ "51": 243,
64
+ "52": 379,
65
+ "53": 91,
66
+ "54": 364,
67
+ "55": 449,
68
+ "56": 412,
69
+ "57": 112,
70
+ "58": 265,
71
+ "59": 188,
72
+ "60": 55,
73
+ "61": 342,
74
+ "62": 311,
75
+ "63": 491
76
+ }
77
+ }
L4A/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 256,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 64,
5
+ "n_ov_heads": 32768,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 1024,
11
+ "layer": 4,
12
+ "model_name": "meta-llama/Llama-3.1-8B",
13
+ "mode": "top_k",
14
+ "top_k": 128,
15
+ "avg_norm": {
16
+ "in": 22.783418655395508,
17
+ "out": 1.4010306596755981
18
+ },
19
+ "d_model": 4096,
20
+ "attn_scale": 11.313708498984761,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 256,
24
+ "rotary_base": 500000.0,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": true,
27
+ "NTK_by_parts_low_freq_factor": 1.0,
28
+ "NTK_by_parts_high_freq_factor": 4.0,
29
+ "NTK_by_parts_factor": 8.0,
30
+ "old_context_len": 8192
31
+ }
L4A/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f02d1943cbe67836523652be7175806ef2af2a3765bb3940864e6c58162c778
3
+ size 1614040466
L4A/metrics.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 7.74775e-05,
3
+ "explained_variance": 0.80203,
4
+ "l1": 7.87,
5
+ "ground_truth_norm": 1.413,
6
+ "reconstructed_norm": 1.301,
7
+ "error_norm": 0.5429,
8
+ "sparsity/below 1e-5": 18830,
9
+ "sparsity/below 1e-6": 18220,
10
+ "positivity": 0.45062,
11
+ "ov_head_live_count": {
12
+ "0": 327,
13
+ "1": 150,
14
+ "2": 366,
15
+ "3": 310,
16
+ "4": 107,
17
+ "5": 407,
18
+ "6": 80,
19
+ "7": 52,
20
+ "8": 210,
21
+ "9": 213,
22
+ "10": 228,
23
+ "11": 212,
24
+ "12": 214,
25
+ "13": 360,
26
+ "14": 273,
27
+ "15": 202,
28
+ "16": 174,
29
+ "17": 273,
30
+ "18": 45,
31
+ "19": 165,
32
+ "20": 194,
33
+ "21": 173,
34
+ "22": 177,
35
+ "23": 238,
36
+ "24": 383,
37
+ "25": 198,
38
+ "26": 193,
39
+ "27": 415,
40
+ "28": 297,
41
+ "29": 312,
42
+ "30": 251,
43
+ "31": 430,
44
+ "32": 184,
45
+ "33": 38,
46
+ "34": 152,
47
+ "35": 150,
48
+ "36": 224,
49
+ "37": 303,
50
+ "38": 221,
51
+ "39": 195,
52
+ "40": 148,
53
+ "41": 128,
54
+ "42": 71,
55
+ "43": 433,
56
+ "44": 277,
57
+ "45": 204,
58
+ "46": 249,
59
+ "47": 100,
60
+ "48": 126,
61
+ "49": 305,
62
+ "50": 243,
63
+ "51": 270,
64
+ "52": 236,
65
+ "53": 387,
66
+ "54": 272,
67
+ "55": 180,
68
+ "56": 161,
69
+ "57": 233,
70
+ "58": 341,
71
+ "59": 331,
72
+ "60": 264,
73
+ "61": 117,
74
+ "62": 292,
75
+ "63": 302
76
+ }
77
+ }