fnlp
/

Hzfinfdu commited on
Commit
ab93a00
·
verified ·
1 Parent(s): 8ed461c

Upload folder using huggingface_hub

Browse files
L0Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 0,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 23.102449417114258,
17
- "out": 3.8546135425567627
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L0Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2851325a82fcf4797fb18ac0d279d05ca10b619d5920a9edb2f4dc98a3355db
3
- size 57006482
 
 
 
 
L0Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00106,
3
- "explained_variance": 0.93259,
4
- "l1": 15.9,
5
- "ground_truth_norm": 3.862,
6
- "reconstructed_norm": 3.764,
7
- "error_norm": 0.801,
8
- "sparsity/below 1e-5": 3331,
9
- "sparsity/below 1e-6": 3316,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L10Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 10,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 144.09291076660156,
17
- "out": 3.9614734649658203
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L10Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:54b1c9c7007f81d230ba61c52810ec8b3232dc5fe33a7e2171d1297edf82342a
3
- size 57006482
 
 
 
 
L10Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00277,
3
- "explained_variance": 0.8377,
4
- "l1": 16.73,
5
- "ground_truth_norm": 3.964,
6
- "reconstructed_norm": 3.734,
7
- "error_norm": 1.2477,
8
- "sparsity/below 1e-5": 3543,
9
- "sparsity/below 1e-6": 3510,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L11Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 11,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 150.25917053222656,
17
- "out": 20.201923370361328
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L11Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:086852435fe7be016564297809b2083e7e2e036641fcf2dc4e7926e6aaa7b19c
3
- size 57006482
 
 
 
 
L11Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00404,
3
- "explained_variance": 0.77293,
4
- "l1": 18.28,
5
- "ground_truth_norm": 20.238,
6
- "reconstructed_norm": 20.157,
7
- "error_norm": 1.5937,
8
- "sparsity/below 1e-5": 4465,
9
- "sparsity/below 1e-6": 4406,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L1Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 1,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 27.244165420532227,
17
- "out": 3.7663726806640625
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L1Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcd30b078958cf045da8d155e337a953dd97ed6ee760b86928dec33d623bd1b6
3
- size 57006482
 
 
 
 
L1Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.0011,
3
- "explained_variance": 0.92292,
4
- "l1": 16.14,
5
- "ground_truth_norm": 3.762,
6
- "reconstructed_norm": 3.65,
7
- "error_norm": 0.8645,
8
- "sparsity/below 1e-5": 2975,
9
- "sparsity/below 1e-6": 2961,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L2Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 2,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 27.967687606811523,
17
- "out": 4.815014839172363
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L2Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c93793b9010c647284edb251923afc56ffae582effc72edf1ea0e1c2334d985
3
- size 57006482
 
 
 
 
L2Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00293,
3
- "explained_variance": 0.88528,
4
- "l1": 21.08,
5
- "ground_truth_norm": 4.764,
6
- "reconstructed_norm": 4.533,
7
- "error_norm": 1.4217,
8
- "sparsity/below 1e-5": 2958,
9
- "sparsity/below 1e-6": 2950,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L3Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 3,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 29.04673957824707,
17
- "out": 7.051708698272705
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L3Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:69691f0beacbf8f0f7b58e245d20f60f764cc445fc418510ae3d1c176a5667e9
3
- size 57006482
 
 
 
 
L3Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00703,
3
- "explained_variance": 0.883,
4
- "l1": 39.99,
5
- "ground_truth_norm": 7.057,
6
- "reconstructed_norm": 6.667,
7
- "error_norm": 2.2736,
8
- "sparsity/below 1e-5": 4204,
9
- "sparsity/below 1e-6": 4204,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L4Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 4,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 39.33174514770508,
17
- "out": 5.237796306610107
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L4Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:67a2bb6b113eb111dfb759d38f61fdc4bbf6e9e6e99c661608dc6d3fa46aa1b2
3
- size 57006482
 
 
 
 
L4Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00345,
3
- "explained_variance": 0.88722,
4
- "l1": 23.17,
5
- "ground_truth_norm": 5.202,
6
- "reconstructed_norm": 4.961,
7
- "error_norm": 1.5414,
8
- "sparsity/below 1e-5": 3891,
9
- "sparsity/below 1e-6": 3887,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L5Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 5,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 40.27893829345703,
17
- "out": 5.156437873840332
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L5Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d430374109057795a15794d3e5d72fe6eb41cb79687e3d70e7703b4e000120de
3
- size 57006482
 
 
 
 
L5Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.0039,
3
- "explained_variance": 0.88818,
4
- "l1": 24.1,
5
- "ground_truth_norm": 5.135,
6
- "reconstructed_norm": 4.869,
7
- "error_norm": 1.5905,
8
- "sparsity/below 1e-5": 3800,
9
- "sparsity/below 1e-6": 3796,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L6Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 6,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 41.99013900756836,
17
- "out": 4.518642902374268
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L6Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:608cd82d0fc3177f79ff8d2afcd49ccdd5eb14bb1c3cc733013a4f3f37deec5a
3
- size 57006482
 
 
 
 
L6Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00344,
3
- "explained_variance": 0.85124,
4
- "l1": 19.99,
5
- "ground_truth_norm": 4.526,
6
- "reconstructed_norm": 4.249,
7
- "error_norm": 1.5198,
8
- "sparsity/below 1e-5": 3257,
9
- "sparsity/below 1e-6": 3251,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L7Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 7,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 57.60716247558594,
17
- "out": 5.574440956115723
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L7Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:061d14ad379b786671e815ebe16c3964cfcac5c0b956bcbb0e05c11f0145a443
3
- size 57006482
 
 
 
 
L7Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00393,
3
- "explained_variance": 0.87354,
4
- "l1": 26.84,
5
- "ground_truth_norm": 5.58,
6
- "reconstructed_norm": 5.312,
7
- "error_norm": 1.6651,
8
- "sparsity/below 1e-5": 4378,
9
- "sparsity/below 1e-6": 4367,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L8Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 8,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 86.12666320800781,
17
- "out": 5.070123672485352
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L8Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5db36f3ba8ff080181ec1f368351c9e13021056ecce6b602347fa2c8088a2487
3
- size 57006482
 
 
 
 
L8Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.0032,
3
- "explained_variance": 0.88431,
4
- "l1": 22.02,
5
- "ground_truth_norm": 5.037,
6
- "reconstructed_norm": 4.824,
7
- "error_norm": 1.3827,
8
- "sparsity/below 1e-5": 3809,
9
- "sparsity/below 1e-6": 3727,
10
- "positivity": 1.0
11
- }
 
 
 
 
 
 
 
 
 
 
 
 
L9Lorsa/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "d_qk_head": 128,
3
- "d_ov_head": 1,
4
- "n_qk_heads": 24,
5
- "n_ov_heads": 6144,
6
- "device": "cuda",
7
- "dtype": "torch.float",
8
- "virtual_kv_num": 0,
9
- "use_z_relu": true,
10
- "n_ctx": 256,
11
- "layer": 9,
12
- "model_name": "EleutherAI/pythia-160m",
13
- "mode": "top_k",
14
- "top_k": 64,
15
- "avg_norm": {
16
- "in": 117.01543426513672,
17
- "out": 4.0516133308410645
18
- },
19
- "d_model": 768,
20
- "attn_scale": 8.0,
21
- "positional_embedding_type": "rotary",
22
- "rotary_scale": 1,
23
- "rotary_dim": 128,
24
- "rotary_base": 10000,
25
- "rotary_adjacent_pairs": false,
26
- "use_NTK_by_parts_rope": false,
27
- "NTK_by_parts_low_freq_factor": null,
28
- "NTK_by_parts_high_freq_factor": null,
29
- "NTK_by_parts_factor": null,
30
- "old_context_len": null
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
L9Lorsa/final.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff7c8c57c2a245adec188f711e33c393da6f786e92908155b91ec059d65dec62
3
- size 57006482
 
 
 
 
L9Lorsa/metrics.json DELETED
@@ -1,11 +0,0 @@
1
- {
2
- "mse_loss": 0.00258,
3
- "explained_variance": 0.87377,
4
- "l1": 18.61,
5
- "ground_truth_norm": 4.069,
6
- "reconstructed_norm": 3.853,
7
- "error_norm": 1.223,
8
- "sparsity/below 1e-5": 3197,
9
- "sparsity/below 1e-6": 3019,
10
- "positivity": 1.0
11
- }