Upload folder using huggingface_hub
Browse files- L1A/config.json +31 -0
- L1A/final.pth +3 -0
- L1A/metrics.json +77 -0
L1A/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"d_qk_head": 256,
|
3 |
+
"d_ov_head": 1,
|
4 |
+
"n_qk_heads": 64,
|
5 |
+
"n_ov_heads": 32768,
|
6 |
+
"device": "cuda",
|
7 |
+
"dtype": "torch.float",
|
8 |
+
"virtual_kv_num": 0,
|
9 |
+
"use_z_relu": true,
|
10 |
+
"n_ctx": 1024,
|
11 |
+
"layer": 1,
|
12 |
+
"model_name": "meta-llama/Llama-3.1-8B",
|
13 |
+
"mode": "top_k",
|
14 |
+
"top_k": 128,
|
15 |
+
"avg_norm": {
|
16 |
+
"in": 14.419232368469238,
|
17 |
+
"out": 0.679307222366333
|
18 |
+
},
|
19 |
+
"d_model": 4096,
|
20 |
+
"attn_scale": 11.313708498984761,
|
21 |
+
"positional_embedding_type": "rotary",
|
22 |
+
"rotary_scale": 1,
|
23 |
+
"rotary_dim": 256,
|
24 |
+
"rotary_base": 500000.0,
|
25 |
+
"rotary_adjacent_pairs": false,
|
26 |
+
"use_NTK_by_parts_rope": true,
|
27 |
+
"NTK_by_parts_low_freq_factor": 1.0,
|
28 |
+
"NTK_by_parts_high_freq_factor": 4.0,
|
29 |
+
"NTK_by_parts_factor": 8.0,
|
30 |
+
"old_context_len": 8192
|
31 |
+
}
|
L1A/final.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33496f2ad08ed2b8a9bc5b2b60519c707ad1f6d277d66bea67ffdbbb1456f851
|
3 |
+
size 1614040466
|
L1A/metrics.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"mse_loss": 6.524e-06,
|
3 |
+
"explained_variance": 0.92435,
|
4 |
+
"l1": 3.83,
|
5 |
+
"ground_truth_norm": 0.676,
|
6 |
+
"reconstructed_norm": 0.656,
|
7 |
+
"error_norm": 0.1569,
|
8 |
+
"sparsity/below 1e-5": 17597,
|
9 |
+
"sparsity/below 1e-6": 17555,
|
10 |
+
"positivity": 0.46515,
|
11 |
+
"ov_head_live_count": {
|
12 |
+
"0": 325,
|
13 |
+
"1": 127,
|
14 |
+
"2": 363,
|
15 |
+
"3": 263,
|
16 |
+
"4": 216,
|
17 |
+
"5": 122,
|
18 |
+
"6": 378,
|
19 |
+
"7": 361,
|
20 |
+
"8": 127,
|
21 |
+
"9": 208,
|
22 |
+
"10": 322,
|
23 |
+
"11": 184,
|
24 |
+
"12": 258,
|
25 |
+
"13": 191,
|
26 |
+
"14": 163,
|
27 |
+
"15": 266,
|
28 |
+
"16": 179,
|
29 |
+
"17": 24,
|
30 |
+
"18": 257,
|
31 |
+
"19": 343,
|
32 |
+
"20": 308,
|
33 |
+
"21": 419,
|
34 |
+
"22": 131,
|
35 |
+
"23": 278,
|
36 |
+
"24": 331,
|
37 |
+
"25": 252,
|
38 |
+
"26": 162,
|
39 |
+
"27": 241,
|
40 |
+
"28": 67,
|
41 |
+
"29": 306,
|
42 |
+
"30": 199,
|
43 |
+
"31": 193,
|
44 |
+
"32": 225,
|
45 |
+
"33": 353,
|
46 |
+
"34": 95,
|
47 |
+
"35": 204,
|
48 |
+
"36": 29,
|
49 |
+
"37": 318,
|
50 |
+
"38": 305,
|
51 |
+
"39": 273,
|
52 |
+
"40": 348,
|
53 |
+
"41": 126,
|
54 |
+
"42": 26,
|
55 |
+
"43": 335,
|
56 |
+
"44": 269,
|
57 |
+
"45": 190,
|
58 |
+
"46": 292,
|
59 |
+
"47": 368,
|
60 |
+
"48": 186,
|
61 |
+
"49": 335,
|
62 |
+
"50": 368,
|
63 |
+
"51": 150,
|
64 |
+
"52": 388,
|
65 |
+
"53": 48,
|
66 |
+
"54": 333,
|
67 |
+
"55": 289,
|
68 |
+
"56": 65,
|
69 |
+
"57": 39,
|
70 |
+
"58": 367,
|
71 |
+
"59": 265,
|
72 |
+
"60": 246,
|
73 |
+
"61": 418,
|
74 |
+
"62": 275,
|
75 |
+
"63": 150
|
76 |
+
}
|
77 |
+
}
|