fnlp
/

Hzfinfdu commited on
Commit
d6a0620
·
verified ·
1 Parent(s): 2d9af8d

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. L1A/config.json +31 -0
  2. L1A/final.pth +3 -0
  3. L1A/metrics.json +77 -0
L1A/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 256,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 64,
5
+ "n_ov_heads": 32768,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 1024,
11
+ "layer": 1,
12
+ "model_name": "meta-llama/Llama-3.1-8B",
13
+ "mode": "top_k",
14
+ "top_k": 128,
15
+ "avg_norm": {
16
+ "in": 14.419232368469238,
17
+ "out": 0.679307222366333
18
+ },
19
+ "d_model": 4096,
20
+ "attn_scale": 11.313708498984761,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 256,
24
+ "rotary_base": 500000.0,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": true,
27
+ "NTK_by_parts_low_freq_factor": 1.0,
28
+ "NTK_by_parts_high_freq_factor": 4.0,
29
+ "NTK_by_parts_factor": 8.0,
30
+ "old_context_len": 8192
31
+ }
L1A/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33496f2ad08ed2b8a9bc5b2b60519c707ad1f6d277d66bea67ffdbbb1456f851
3
+ size 1614040466
L1A/metrics.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 6.524e-06,
3
+ "explained_variance": 0.92435,
4
+ "l1": 3.83,
5
+ "ground_truth_norm": 0.676,
6
+ "reconstructed_norm": 0.656,
7
+ "error_norm": 0.1569,
8
+ "sparsity/below 1e-5": 17597,
9
+ "sparsity/below 1e-6": 17555,
10
+ "positivity": 0.46515,
11
+ "ov_head_live_count": {
12
+ "0": 325,
13
+ "1": 127,
14
+ "2": 363,
15
+ "3": 263,
16
+ "4": 216,
17
+ "5": 122,
18
+ "6": 378,
19
+ "7": 361,
20
+ "8": 127,
21
+ "9": 208,
22
+ "10": 322,
23
+ "11": 184,
24
+ "12": 258,
25
+ "13": 191,
26
+ "14": 163,
27
+ "15": 266,
28
+ "16": 179,
29
+ "17": 24,
30
+ "18": 257,
31
+ "19": 343,
32
+ "20": 308,
33
+ "21": 419,
34
+ "22": 131,
35
+ "23": 278,
36
+ "24": 331,
37
+ "25": 252,
38
+ "26": 162,
39
+ "27": 241,
40
+ "28": 67,
41
+ "29": 306,
42
+ "30": 199,
43
+ "31": 193,
44
+ "32": 225,
45
+ "33": 353,
46
+ "34": 95,
47
+ "35": 204,
48
+ "36": 29,
49
+ "37": 318,
50
+ "38": 305,
51
+ "39": 273,
52
+ "40": 348,
53
+ "41": 126,
54
+ "42": 26,
55
+ "43": 335,
56
+ "44": 269,
57
+ "45": 190,
58
+ "46": 292,
59
+ "47": 368,
60
+ "48": 186,
61
+ "49": 335,
62
+ "50": 368,
63
+ "51": 150,
64
+ "52": 388,
65
+ "53": 48,
66
+ "54": 333,
67
+ "55": 289,
68
+ "56": 65,
69
+ "57": 39,
70
+ "58": 367,
71
+ "59": 265,
72
+ "60": 246,
73
+ "61": 418,
74
+ "62": 275,
75
+ "63": 150
76
+ }
77
+ }