fnlp
/

Hzfinfdu commited on
Commit
8ed461c
·
verified ·
1 Parent(s): 4da44ca

Upload folder using huggingface_hub

Browse files
L0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 0,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 23.102449417114258,
17
+ "out": 3.8546135425567627
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L0/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2851325a82fcf4797fb18ac0d279d05ca10b619d5920a9edb2f4dc98a3355db
3
+ size 57006482
L0/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00106,
3
+ "explained_variance": 0.93259,
4
+ "l1": 15.9,
5
+ "ground_truth_norm": 3.862,
6
+ "reconstructed_norm": 3.764,
7
+ "error_norm": 0.801,
8
+ "sparsity/below 1e-5": 3331,
9
+ "sparsity/below 1e-6": 3316,
10
+ "positivity": 1.0
11
+ }
L1/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 1,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 27.244165420532227,
17
+ "out": 3.7663726806640625
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L1/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcd30b078958cf045da8d155e337a953dd97ed6ee760b86928dec33d623bd1b6
3
+ size 57006482
L1/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.0011,
3
+ "explained_variance": 0.92292,
4
+ "l1": 16.14,
5
+ "ground_truth_norm": 3.762,
6
+ "reconstructed_norm": 3.65,
7
+ "error_norm": 0.8645,
8
+ "sparsity/below 1e-5": 2975,
9
+ "sparsity/below 1e-6": 2961,
10
+ "positivity": 1.0
11
+ }
L10/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 10,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 144.09291076660156,
17
+ "out": 3.9614734649658203
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L10/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54b1c9c7007f81d230ba61c52810ec8b3232dc5fe33a7e2171d1297edf82342a
3
+ size 57006482
L10/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00277,
3
+ "explained_variance": 0.8377,
4
+ "l1": 16.73,
5
+ "ground_truth_norm": 3.964,
6
+ "reconstructed_norm": 3.734,
7
+ "error_norm": 1.2477,
8
+ "sparsity/below 1e-5": 3543,
9
+ "sparsity/below 1e-6": 3510,
10
+ "positivity": 1.0
11
+ }
L11/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 11,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 150.25917053222656,
17
+ "out": 20.201923370361328
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L11/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:086852435fe7be016564297809b2083e7e2e036641fcf2dc4e7926e6aaa7b19c
3
+ size 57006482
L11/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00404,
3
+ "explained_variance": 0.77293,
4
+ "l1": 18.28,
5
+ "ground_truth_norm": 20.238,
6
+ "reconstructed_norm": 20.157,
7
+ "error_norm": 1.5937,
8
+ "sparsity/below 1e-5": 4465,
9
+ "sparsity/below 1e-6": 4406,
10
+ "positivity": 1.0
11
+ }
L2/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 2,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 27.967687606811523,
17
+ "out": 4.815014839172363
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L2/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c93793b9010c647284edb251923afc56ffae582effc72edf1ea0e1c2334d985
3
+ size 57006482
L2/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00293,
3
+ "explained_variance": 0.88528,
4
+ "l1": 21.08,
5
+ "ground_truth_norm": 4.764,
6
+ "reconstructed_norm": 4.533,
7
+ "error_norm": 1.4217,
8
+ "sparsity/below 1e-5": 2958,
9
+ "sparsity/below 1e-6": 2950,
10
+ "positivity": 1.0
11
+ }
L3/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 3,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 29.04673957824707,
17
+ "out": 7.051708698272705
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L3/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69691f0beacbf8f0f7b58e245d20f60f764cc445fc418510ae3d1c176a5667e9
3
+ size 57006482
L3/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00703,
3
+ "explained_variance": 0.883,
4
+ "l1": 39.99,
5
+ "ground_truth_norm": 7.057,
6
+ "reconstructed_norm": 6.667,
7
+ "error_norm": 2.2736,
8
+ "sparsity/below 1e-5": 4204,
9
+ "sparsity/below 1e-6": 4204,
10
+ "positivity": 1.0
11
+ }
L4/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 4,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 39.33174514770508,
17
+ "out": 5.237796306610107
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L4/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67a2bb6b113eb111dfb759d38f61fdc4bbf6e9e6e99c661608dc6d3fa46aa1b2
3
+ size 57006482
L4/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00345,
3
+ "explained_variance": 0.88722,
4
+ "l1": 23.17,
5
+ "ground_truth_norm": 5.202,
6
+ "reconstructed_norm": 4.961,
7
+ "error_norm": 1.5414,
8
+ "sparsity/below 1e-5": 3891,
9
+ "sparsity/below 1e-6": 3887,
10
+ "positivity": 1.0
11
+ }
L5/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 5,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 40.27893829345703,
17
+ "out": 5.156437873840332
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L5/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d430374109057795a15794d3e5d72fe6eb41cb79687e3d70e7703b4e000120de
3
+ size 57006482
L5/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.0039,
3
+ "explained_variance": 0.88818,
4
+ "l1": 24.1,
5
+ "ground_truth_norm": 5.135,
6
+ "reconstructed_norm": 4.869,
7
+ "error_norm": 1.5905,
8
+ "sparsity/below 1e-5": 3800,
9
+ "sparsity/below 1e-6": 3796,
10
+ "positivity": 1.0
11
+ }
L6/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 6,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 41.99013900756836,
17
+ "out": 4.518642902374268
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L6/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:608cd82d0fc3177f79ff8d2afcd49ccdd5eb14bb1c3cc733013a4f3f37deec5a
3
+ size 57006482
L6/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00344,
3
+ "explained_variance": 0.85124,
4
+ "l1": 19.99,
5
+ "ground_truth_norm": 4.526,
6
+ "reconstructed_norm": 4.249,
7
+ "error_norm": 1.5198,
8
+ "sparsity/below 1e-5": 3257,
9
+ "sparsity/below 1e-6": 3251,
10
+ "positivity": 1.0
11
+ }
L7/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 7,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 57.60716247558594,
17
+ "out": 5.574440956115723
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L7/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:061d14ad379b786671e815ebe16c3964cfcac5c0b956bcbb0e05c11f0145a443
3
+ size 57006482
L7/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00393,
3
+ "explained_variance": 0.87354,
4
+ "l1": 26.84,
5
+ "ground_truth_norm": 5.58,
6
+ "reconstructed_norm": 5.312,
7
+ "error_norm": 1.6651,
8
+ "sparsity/below 1e-5": 4378,
9
+ "sparsity/below 1e-6": 4367,
10
+ "positivity": 1.0
11
+ }
L8/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 8,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 86.12666320800781,
17
+ "out": 5.070123672485352
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L8/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db36f3ba8ff080181ec1f368351c9e13021056ecce6b602347fa2c8088a2487
3
+ size 57006482
L8/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.0032,
3
+ "explained_variance": 0.88431,
4
+ "l1": 22.02,
5
+ "ground_truth_norm": 5.037,
6
+ "reconstructed_norm": 4.824,
7
+ "error_norm": 1.3827,
8
+ "sparsity/below 1e-5": 3809,
9
+ "sparsity/below 1e-6": 3727,
10
+ "positivity": 1.0
11
+ }
L9/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "d_qk_head": 128,
3
+ "d_ov_head": 1,
4
+ "n_qk_heads": 24,
5
+ "n_ov_heads": 6144,
6
+ "device": "cuda",
7
+ "dtype": "torch.float",
8
+ "virtual_kv_num": 0,
9
+ "use_z_relu": true,
10
+ "n_ctx": 256,
11
+ "layer": 9,
12
+ "model_name": "EleutherAI/pythia-160m",
13
+ "mode": "top_k",
14
+ "top_k": 64,
15
+ "avg_norm": {
16
+ "in": 117.01543426513672,
17
+ "out": 4.0516133308410645
18
+ },
19
+ "d_model": 768,
20
+ "attn_scale": 8.0,
21
+ "positional_embedding_type": "rotary",
22
+ "rotary_scale": 1,
23
+ "rotary_dim": 128,
24
+ "rotary_base": 10000,
25
+ "rotary_adjacent_pairs": false,
26
+ "use_NTK_by_parts_rope": false,
27
+ "NTK_by_parts_low_freq_factor": null,
28
+ "NTK_by_parts_high_freq_factor": null,
29
+ "NTK_by_parts_factor": null,
30
+ "old_context_len": null
31
+ }
L9/final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff7c8c57c2a245adec188f711e33c393da6f786e92908155b91ec059d65dec62
3
+ size 57006482
L9/metrics.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mse_loss": 0.00258,
3
+ "explained_variance": 0.87377,
4
+ "l1": 18.61,
5
+ "ground_truth_norm": 4.069,
6
+ "reconstructed_norm": 3.853,
7
+ "error_norm": 1.223,
8
+ "sparsity/below 1e-5": 3197,
9
+ "sparsity/below 1e-6": 3019,
10
+ "positivity": 1.0
11
+ }