ctigges commited on
Commit
25c9670
·
verified ·
1 Parent(s): 463f6cf

Upload 29 files

Browse files
model_acts/.DS_Store ADDED
Binary file (6.15 kB). View file
 
model_acts/tied_per_target_skip/.DS_Store ADDED
Binary file (6.15 kB). View file
 
model_acts/tied_per_target_skip/layer_0.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d3353a378c70bff4038350ca71a4c22ce889e2a95b8eb766d4955556a8b45e2
3
+ size 494337217
model_acts/tied_per_target_skip/layer_1.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1245d6596f7b7960be2e76b93763ca1fe5afbd99dbef625079f83680490a8d7
3
+ size 489508799
model_acts/tied_per_target_skip/layer_10.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4156899cce8b78fcb0737f2a7ee40c8f8401e531b3e996f54421d0999119f8
3
+ size 489802191
model_acts/tied_per_target_skip/layer_11.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77b1202667b27619eb416881f74f5a39385e6eba0162ed2480a6b770018e318f
3
+ size 504547759
model_acts/tied_per_target_skip/layer_2.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfcf5fba291b0ec9255646f605e6a9735e4eb4a3a415326d74d2a2987e6549be
3
+ size 485271365
model_acts/tied_per_target_skip/layer_3.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2f8ceee17e3a50882338106742a000244b803c0788e0596bc0fe159f0c4e5b
3
+ size 475999817
model_acts/tied_per_target_skip/layer_4.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b63d5dae34c085ece13d453b5feec845e3c882ce733b01d5387bab3bff1cf348
3
+ size 470775020
model_acts/tied_per_target_skip/layer_5.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:520a129c598423f412df36c060c58663d394ff3f05f672ca860ee779f4f507f2
3
+ size 468067069
model_acts/tied_per_target_skip/layer_6.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6badc8c86c62504583a2825cb344597ba678d97401d4553e17486b4bd07fad65
3
+ size 468245655
model_acts/tied_per_target_skip/layer_7.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb313a78d6b8b4ed3d54e9ed1ae03565959fc5750ec86dc12e73e7d0b1261c98
3
+ size 470199246
model_acts/tied_per_target_skip/layer_8.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbb1dfae32e48f7b09c4249bd109c9a14e2d52f14b8a1cd146c829af572b8cd
3
+ size 473898103
model_acts/tied_per_target_skip/layer_9.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8919692d98e664eb3640ef781ff97db803849126f55fbc0908641a1ffedde740
3
+ size 480251933
model_acts/tied_per_target_skip/summary.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_path": "trained_gpt2_clts/tied-per-target-skip/tied_per_target_skip_global_batchtopk_jumprelu",
3
+ "config": {
4
+ "num_features": 122880,
5
+ "num_layers": 12,
6
+ "d_model": 768,
7
+ "model_name": null,
8
+ "normalization_method": "mean_std",
9
+ "activation_fn": "jumprelu",
10
+ "jumprelu_threshold": 0.0,
11
+ "batchtopk_k": null,
12
+ "batchtopk_straight_through": false,
13
+ "topk_k": null,
14
+ "topk_straight_through": true,
15
+ "topk_mode": "global",
16
+ "two_stage_batchtopk": false,
17
+ "two_stage_topk": false,
18
+ "clt_dtype": null,
19
+ "expected_input_dtype": null,
20
+ "mlp_input_template": null,
21
+ "mlp_output_template": null,
22
+ "tl_input_template": null,
23
+ "tl_output_template": null,
24
+ "decoder_tying": "per_target",
25
+ "enable_feature_offset": false,
26
+ "enable_feature_scale": false,
27
+ "skip_connection": true
28
+ },
29
+ "layer_stats": {
30
+ "0": {
31
+ "avg_l0": 4.6845703125,
32
+ "max_l0": 45.0
33
+ },
34
+ "1": {
35
+ "avg_l0": 3.1630859375,
36
+ "max_l0": 56.0
37
+ },
38
+ "2": {
39
+ "avg_l0": 6.1865234375,
40
+ "max_l0": 232.0
41
+ },
42
+ "3": {
43
+ "avg_l0": 4.947265625,
44
+ "max_l0": 26.0
45
+ },
46
+ "4": {
47
+ "avg_l0": 5.6328125,
48
+ "max_l0": 83.0
49
+ },
50
+ "5": {
51
+ "avg_l0": 4.9423828125,
52
+ "max_l0": 189.0
53
+ },
54
+ "6": {
55
+ "avg_l0": 8.0615234375,
56
+ "max_l0": 240.0
57
+ },
58
+ "7": {
59
+ "avg_l0": 9.8701171875,
60
+ "max_l0": 210.0
61
+ },
62
+ "8": {
63
+ "avg_l0": 12.326171875,
64
+ "max_l0": 332.0
65
+ },
66
+ "9": {
67
+ "avg_l0": 21.96875,
68
+ "max_l0": 624.0
69
+ },
70
+ "10": {
71
+ "avg_l0": 32.90234375,
72
+ "max_l0": 306.0
73
+ },
74
+ "11": {
75
+ "avg_l0": 44.5126953125,
76
+ "max_l0": 521.0
77
+ }
78
+ }
79
+ }
model_acts/untied_batchtopk/.DS_Store ADDED
Binary file (6.15 kB). View file
 
model_acts/untied_batchtopk/layer_0.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83389d695c2dd9d07d56ccb1cf2656b99c680c5604cd3f6f6e2079fa0d6085ee
3
+ size 137572646
model_acts/untied_batchtopk/layer_1.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77b138646d0718814a4c0d0e639053ff8a6a666142a35b823aaf4392f80768ce
3
+ size 138485586
model_acts/untied_batchtopk/layer_10.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29582939fdb05468684438f909154c0d19da08d1a549a5878753dcabcfe7b7e
3
+ size 141088709
model_acts/untied_batchtopk/layer_11.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76f2c85ca2c72e0e2194c69aba50621b2b60e013e23cb05a95a8ef19cf6c42af
3
+ size 141985132
model_acts/untied_batchtopk/layer_2.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adb5cde2c96825bde7926fe423605fd4eeb52060ea2e05e3855a5647d10b0dd0
3
+ size 141012590
model_acts/untied_batchtopk/layer_3.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b3f8b07c9a6f03e9d2fb058be3c84fa87a3907ef7aa4dc5a2523ba62ba164d
3
+ size 139652495
model_acts/untied_batchtopk/layer_4.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f17cb00904be3442f3b74400efa26028db87f4686b27e946c919a57a7985160a
3
+ size 139469523
model_acts/untied_batchtopk/layer_5.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e683c0acdd92f0ce536b9edb1f7c41a856f04c582b034a2fbbaa3f241d17c0
3
+ size 139326223
model_acts/untied_batchtopk/layer_6.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff45531db388be36092bc8920527de785600ddb09bca1b3c03a6bbe45b8ea99
3
+ size 139405298
model_acts/untied_batchtopk/layer_7.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c0ccde584199f0b5418513fb1979fb65941f85383c9404b0958f8938523877
3
+ size 139476702
model_acts/untied_batchtopk/layer_8.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be904e1b04517ac30ab45ced760c2ce0463b4c45275ce0beac179e8b08fcea4c
3
+ size 139567834
model_acts/untied_batchtopk/layer_9.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:457a914fa4690cbf4ba2741c5900f2b88bc813cf9c4e857617444e959ff77fce
3
+ size 139921003
model_acts/untied_batchtopk/summary.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_path": "trained_gpt2_clts/untied-batchtopk/untied_global_batchtopk_jumprelu",
3
+ "config": {
4
+ "num_features": 32768,
5
+ "num_layers": 12,
6
+ "d_model": 768,
7
+ "model_name": null,
8
+ "normalization_method": "mean_std",
9
+ "activation_fn": "jumprelu",
10
+ "jumprelu_threshold": 0.0,
11
+ "batchtopk_k": null,
12
+ "batchtopk_straight_through": false,
13
+ "topk_k": null,
14
+ "topk_straight_through": true,
15
+ "topk_mode": "global",
16
+ "two_stage_batchtopk": false,
17
+ "two_stage_topk": false,
18
+ "clt_dtype": null,
19
+ "expected_input_dtype": null,
20
+ "mlp_input_template": null,
21
+ "mlp_output_template": null,
22
+ "tl_input_template": null,
23
+ "tl_output_template": null,
24
+ "decoder_tying": "none",
25
+ "enable_feature_offset": false,
26
+ "enable_feature_scale": false,
27
+ "skip_connection": false
28
+ },
29
+ "layer_stats": {
30
+ "0": {
31
+ "avg_l0": 6.1376953125,
32
+ "max_l0": 36.0
33
+ },
34
+ "1": {
35
+ "avg_l0": 3.8759765625,
36
+ "max_l0": 51.0
37
+ },
38
+ "2": {
39
+ "avg_l0": 6.123046875,
40
+ "max_l0": 116.0
41
+ },
42
+ "3": {
43
+ "avg_l0": 5.4716796875,
44
+ "max_l0": 36.0
45
+ },
46
+ "4": {
47
+ "avg_l0": 7.1689453125,
48
+ "max_l0": 31.0
49
+ },
50
+ "5": {
51
+ "avg_l0": 8.97265625,
52
+ "max_l0": 52.0
53
+ },
54
+ "6": {
55
+ "avg_l0": 14.6796875,
56
+ "max_l0": 147.0
57
+ },
58
+ "7": {
59
+ "avg_l0": 18.650390625,
60
+ "max_l0": 116.0
61
+ },
62
+ "8": {
63
+ "avg_l0": 24.76171875,
64
+ "max_l0": 130.0
65
+ },
66
+ "9": {
67
+ "avg_l0": 23.3310546875,
68
+ "max_l0": 81.0
69
+ },
70
+ "10": {
71
+ "avg_l0": 28.69140625,
72
+ "max_l0": 130.0
73
+ },
74
+ "11": {
75
+ "avg_l0": 40.8994140625,
76
+ "max_l0": 330.0
77
+ }
78
+ }
79
+ }