Upload 29 files
Browse files- model_acts/.DS_Store +0 -0
- model_acts/tied_per_target_skip/.DS_Store +0 -0
- model_acts/tied_per_target_skip/layer_0.npz +3 -0
- model_acts/tied_per_target_skip/layer_1.npz +3 -0
- model_acts/tied_per_target_skip/layer_10.npz +3 -0
- model_acts/tied_per_target_skip/layer_11.npz +3 -0
- model_acts/tied_per_target_skip/layer_2.npz +3 -0
- model_acts/tied_per_target_skip/layer_3.npz +3 -0
- model_acts/tied_per_target_skip/layer_4.npz +3 -0
- model_acts/tied_per_target_skip/layer_5.npz +3 -0
- model_acts/tied_per_target_skip/layer_6.npz +3 -0
- model_acts/tied_per_target_skip/layer_7.npz +3 -0
- model_acts/tied_per_target_skip/layer_8.npz +3 -0
- model_acts/tied_per_target_skip/layer_9.npz +3 -0
- model_acts/tied_per_target_skip/summary.json +79 -0
- model_acts/untied_batchtopk/.DS_Store +0 -0
- model_acts/untied_batchtopk/layer_0.npz +3 -0
- model_acts/untied_batchtopk/layer_1.npz +3 -0
- model_acts/untied_batchtopk/layer_10.npz +3 -0
- model_acts/untied_batchtopk/layer_11.npz +3 -0
- model_acts/untied_batchtopk/layer_2.npz +3 -0
- model_acts/untied_batchtopk/layer_3.npz +3 -0
- model_acts/untied_batchtopk/layer_4.npz +3 -0
- model_acts/untied_batchtopk/layer_5.npz +3 -0
- model_acts/untied_batchtopk/layer_6.npz +3 -0
- model_acts/untied_batchtopk/layer_7.npz +3 -0
- model_acts/untied_batchtopk/layer_8.npz +3 -0
- model_acts/untied_batchtopk/layer_9.npz +3 -0
- model_acts/untied_batchtopk/summary.json +79 -0
model_acts/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
model_acts/tied_per_target_skip/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
model_acts/tied_per_target_skip/layer_0.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d3353a378c70bff4038350ca71a4c22ce889e2a95b8eb766d4955556a8b45e2
|
3 |
+
size 494337217
|
model_acts/tied_per_target_skip/layer_1.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1245d6596f7b7960be2e76b93763ca1fe5afbd99dbef625079f83680490a8d7
|
3 |
+
size 489508799
|
model_acts/tied_per_target_skip/layer_10.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a4156899cce8b78fcb0737f2a7ee40c8f8401e531b3e996f54421d0999119f8
|
3 |
+
size 489802191
|
model_acts/tied_per_target_skip/layer_11.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77b1202667b27619eb416881f74f5a39385e6eba0162ed2480a6b770018e318f
|
3 |
+
size 504547759
|
model_acts/tied_per_target_skip/layer_2.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfcf5fba291b0ec9255646f605e6a9735e4eb4a3a415326d74d2a2987e6549be
|
3 |
+
size 485271365
|
model_acts/tied_per_target_skip/layer_3.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba2f8ceee17e3a50882338106742a000244b803c0788e0596bc0fe159f0c4e5b
|
3 |
+
size 475999817
|
model_acts/tied_per_target_skip/layer_4.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b63d5dae34c085ece13d453b5feec845e3c882ce733b01d5387bab3bff1cf348
|
3 |
+
size 470775020
|
model_acts/tied_per_target_skip/layer_5.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:520a129c598423f412df36c060c58663d394ff3f05f672ca860ee779f4f507f2
|
3 |
+
size 468067069
|
model_acts/tied_per_target_skip/layer_6.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6badc8c86c62504583a2825cb344597ba678d97401d4553e17486b4bd07fad65
|
3 |
+
size 468245655
|
model_acts/tied_per_target_skip/layer_7.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb313a78d6b8b4ed3d54e9ed1ae03565959fc5750ec86dc12e73e7d0b1261c98
|
3 |
+
size 470199246
|
model_acts/tied_per_target_skip/layer_8.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fbb1dfae32e48f7b09c4249bd109c9a14e2d52f14b8a1cd146c829af572b8cd
|
3 |
+
size 473898103
|
model_acts/tied_per_target_skip/layer_9.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8919692d98e664eb3640ef781ff97db803849126f55fbc0908641a1ffedde740
|
3 |
+
size 480251933
|
model_acts/tied_per_target_skip/summary.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_path": "trained_gpt2_clts/tied-per-target-skip/tied_per_target_skip_global_batchtopk_jumprelu",
|
3 |
+
"config": {
|
4 |
+
"num_features": 122880,
|
5 |
+
"num_layers": 12,
|
6 |
+
"d_model": 768,
|
7 |
+
"model_name": null,
|
8 |
+
"normalization_method": "mean_std",
|
9 |
+
"activation_fn": "jumprelu",
|
10 |
+
"jumprelu_threshold": 0.0,
|
11 |
+
"batchtopk_k": null,
|
12 |
+
"batchtopk_straight_through": false,
|
13 |
+
"topk_k": null,
|
14 |
+
"topk_straight_through": true,
|
15 |
+
"topk_mode": "global",
|
16 |
+
"two_stage_batchtopk": false,
|
17 |
+
"two_stage_topk": false,
|
18 |
+
"clt_dtype": null,
|
19 |
+
"expected_input_dtype": null,
|
20 |
+
"mlp_input_template": null,
|
21 |
+
"mlp_output_template": null,
|
22 |
+
"tl_input_template": null,
|
23 |
+
"tl_output_template": null,
|
24 |
+
"decoder_tying": "per_target",
|
25 |
+
"enable_feature_offset": false,
|
26 |
+
"enable_feature_scale": false,
|
27 |
+
"skip_connection": true
|
28 |
+
},
|
29 |
+
"layer_stats": {
|
30 |
+
"0": {
|
31 |
+
"avg_l0": 4.6845703125,
|
32 |
+
"max_l0": 45.0
|
33 |
+
},
|
34 |
+
"1": {
|
35 |
+
"avg_l0": 3.1630859375,
|
36 |
+
"max_l0": 56.0
|
37 |
+
},
|
38 |
+
"2": {
|
39 |
+
"avg_l0": 6.1865234375,
|
40 |
+
"max_l0": 232.0
|
41 |
+
},
|
42 |
+
"3": {
|
43 |
+
"avg_l0": 4.947265625,
|
44 |
+
"max_l0": 26.0
|
45 |
+
},
|
46 |
+
"4": {
|
47 |
+
"avg_l0": 5.6328125,
|
48 |
+
"max_l0": 83.0
|
49 |
+
},
|
50 |
+
"5": {
|
51 |
+
"avg_l0": 4.9423828125,
|
52 |
+
"max_l0": 189.0
|
53 |
+
},
|
54 |
+
"6": {
|
55 |
+
"avg_l0": 8.0615234375,
|
56 |
+
"max_l0": 240.0
|
57 |
+
},
|
58 |
+
"7": {
|
59 |
+
"avg_l0": 9.8701171875,
|
60 |
+
"max_l0": 210.0
|
61 |
+
},
|
62 |
+
"8": {
|
63 |
+
"avg_l0": 12.326171875,
|
64 |
+
"max_l0": 332.0
|
65 |
+
},
|
66 |
+
"9": {
|
67 |
+
"avg_l0": 21.96875,
|
68 |
+
"max_l0": 624.0
|
69 |
+
},
|
70 |
+
"10": {
|
71 |
+
"avg_l0": 32.90234375,
|
72 |
+
"max_l0": 306.0
|
73 |
+
},
|
74 |
+
"11": {
|
75 |
+
"avg_l0": 44.5126953125,
|
76 |
+
"max_l0": 521.0
|
77 |
+
}
|
78 |
+
}
|
79 |
+
}
|
model_acts/untied_batchtopk/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
model_acts/untied_batchtopk/layer_0.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83389d695c2dd9d07d56ccb1cf2656b99c680c5604cd3f6f6e2079fa0d6085ee
|
3 |
+
size 137572646
|
model_acts/untied_batchtopk/layer_1.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77b138646d0718814a4c0d0e639053ff8a6a666142a35b823aaf4392f80768ce
|
3 |
+
size 138485586
|
model_acts/untied_batchtopk/layer_10.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c29582939fdb05468684438f909154c0d19da08d1a549a5878753dcabcfe7b7e
|
3 |
+
size 141088709
|
model_acts/untied_batchtopk/layer_11.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76f2c85ca2c72e0e2194c69aba50621b2b60e013e23cb05a95a8ef19cf6c42af
|
3 |
+
size 141985132
|
model_acts/untied_batchtopk/layer_2.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adb5cde2c96825bde7926fe423605fd4eeb52060ea2e05e3855a5647d10b0dd0
|
3 |
+
size 141012590
|
model_acts/untied_batchtopk/layer_3.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60b3f8b07c9a6f03e9d2fb058be3c84fa87a3907ef7aa4dc5a2523ba62ba164d
|
3 |
+
size 139652495
|
model_acts/untied_batchtopk/layer_4.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f17cb00904be3442f3b74400efa26028db87f4686b27e946c919a57a7985160a
|
3 |
+
size 139469523
|
model_acts/untied_batchtopk/layer_5.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5e683c0acdd92f0ce536b9edb1f7c41a856f04c582b034a2fbbaa3f241d17c0
|
3 |
+
size 139326223
|
model_acts/untied_batchtopk/layer_6.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eff45531db388be36092bc8920527de785600ddb09bca1b3c03a6bbe45b8ea99
|
3 |
+
size 139405298
|
model_acts/untied_batchtopk/layer_7.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0c0ccde584199f0b5418513fb1979fb65941f85383c9404b0958f8938523877
|
3 |
+
size 139476702
|
model_acts/untied_batchtopk/layer_8.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be904e1b04517ac30ab45ced760c2ce0463b4c45275ce0beac179e8b08fcea4c
|
3 |
+
size 139567834
|
model_acts/untied_batchtopk/layer_9.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:457a914fa4690cbf4ba2741c5900f2b88bc813cf9c4e857617444e959ff77fce
|
3 |
+
size 139921003
|
model_acts/untied_batchtopk/summary.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_path": "trained_gpt2_clts/untied-batchtopk/untied_global_batchtopk_jumprelu",
|
3 |
+
"config": {
|
4 |
+
"num_features": 32768,
|
5 |
+
"num_layers": 12,
|
6 |
+
"d_model": 768,
|
7 |
+
"model_name": null,
|
8 |
+
"normalization_method": "mean_std",
|
9 |
+
"activation_fn": "jumprelu",
|
10 |
+
"jumprelu_threshold": 0.0,
|
11 |
+
"batchtopk_k": null,
|
12 |
+
"batchtopk_straight_through": false,
|
13 |
+
"topk_k": null,
|
14 |
+
"topk_straight_through": true,
|
15 |
+
"topk_mode": "global",
|
16 |
+
"two_stage_batchtopk": false,
|
17 |
+
"two_stage_topk": false,
|
18 |
+
"clt_dtype": null,
|
19 |
+
"expected_input_dtype": null,
|
20 |
+
"mlp_input_template": null,
|
21 |
+
"mlp_output_template": null,
|
22 |
+
"tl_input_template": null,
|
23 |
+
"tl_output_template": null,
|
24 |
+
"decoder_tying": "none",
|
25 |
+
"enable_feature_offset": false,
|
26 |
+
"enable_feature_scale": false,
|
27 |
+
"skip_connection": false
|
28 |
+
},
|
29 |
+
"layer_stats": {
|
30 |
+
"0": {
|
31 |
+
"avg_l0": 6.1376953125,
|
32 |
+
"max_l0": 36.0
|
33 |
+
},
|
34 |
+
"1": {
|
35 |
+
"avg_l0": 3.8759765625,
|
36 |
+
"max_l0": 51.0
|
37 |
+
},
|
38 |
+
"2": {
|
39 |
+
"avg_l0": 6.123046875,
|
40 |
+
"max_l0": 116.0
|
41 |
+
},
|
42 |
+
"3": {
|
43 |
+
"avg_l0": 5.4716796875,
|
44 |
+
"max_l0": 36.0
|
45 |
+
},
|
46 |
+
"4": {
|
47 |
+
"avg_l0": 7.1689453125,
|
48 |
+
"max_l0": 31.0
|
49 |
+
},
|
50 |
+
"5": {
|
51 |
+
"avg_l0": 8.97265625,
|
52 |
+
"max_l0": 52.0
|
53 |
+
},
|
54 |
+
"6": {
|
55 |
+
"avg_l0": 14.6796875,
|
56 |
+
"max_l0": 147.0
|
57 |
+
},
|
58 |
+
"7": {
|
59 |
+
"avg_l0": 18.650390625,
|
60 |
+
"max_l0": 116.0
|
61 |
+
},
|
62 |
+
"8": {
|
63 |
+
"avg_l0": 24.76171875,
|
64 |
+
"max_l0": 130.0
|
65 |
+
},
|
66 |
+
"9": {
|
67 |
+
"avg_l0": 23.3310546875,
|
68 |
+
"max_l0": 81.0
|
69 |
+
},
|
70 |
+
"10": {
|
71 |
+
"avg_l0": 28.69140625,
|
72 |
+
"max_l0": 130.0
|
73 |
+
},
|
74 |
+
"11": {
|
75 |
+
"avg_l0": 40.8994140625,
|
76 |
+
"max_l0": 330.0
|
77 |
+
}
|
78 |
+
}
|
79 |
+
}
|