sthyland commited on
Commit
7ae648e
·
verified ·
1 Parent(s): 5f5a160

upload SAEs and autointerp

Browse files
ef2/autointerp_layer15_res_matryoshka_k256_ef2.csv ADDED
The diff for this file is too large to render. See raw diff
 
ef2/config.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sae:
2
+ type: matryoshka_batch_topk
3
+ activation_dim: 4096
4
+ expansion_factor: 2
5
+ layer_id: 15
6
+ hookpoint: residual
7
+ k: 256
8
+ group_fractions:
9
+ - 0.5
10
+ - 0.25
11
+ - 0.125
12
+ - 0.0625
13
+ - 0.0625
14
+ group_weights: null
15
+ trainer:
16
+ epochs: 1
17
+ lr: null
18
+ l1_penalty: 0.1
19
+ warmup_steps: 10
20
+ sparsity_warmup_steps: 0
21
+ decay_start: null
22
+ resample_steps: null
23
+ seed: 42
24
+ device: cuda:0
25
+ log_every_n_steps: 1000
26
+ logger_type: mlflow
27
+ validate: false
28
+ auxk_alpha: 0.03125
29
+ threshold_beta: 0.999
30
+ threshold_start_step: 1000
31
+ threshold_dead_features: 100000
32
+ data:
33
+ dataset_names:
34
+ - mimic_findings_temporal
35
+ activations_type: per_token
36
+ num_workers: 18
37
+ batch_size: 8192
38
+ val_samples: 512000
39
+ train_samples: null
40
+ norm_act: true
41
+ norm_to_sqrt_act_dim: false
42
+ input_unit_norm: false
43
+ filter_dict: null
ef2/layer15_res_matryoshka_k256_ef2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed20976e0043d310f219473b2f20e98c84a4281a5d418a4ab822501fb79e605e
3
+ size 268487386
ef4/autointerp_layer15_res_matryoshka_k256_ef4.csv ADDED
The diff for this file is too large to render. See raw diff
 
ef4/config.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sae:
2
+ type: matryoshka_batch_topk
3
+ activation_dim: 4096
4
+ expansion_factor: 4
5
+ layer_id: 15
6
+ hookpoint: residual
7
+ k: 256
8
+ group_fractions:
9
+ - 0.5
10
+ - 0.25
11
+ - 0.125
12
+ - 0.0625
13
+ - 0.0625
14
+ group_weights: null
15
+ trainer:
16
+ epochs: 1
17
+ lr: null
18
+ l1_penalty: 0.1
19
+ warmup_steps: 10
20
+ sparsity_warmup_steps: 0
21
+ decay_start: null
22
+ resample_steps: null
23
+ seed: 42
24
+ device: cuda:0
25
+ log_every_n_steps: 1000
26
+ logger_type: mlflow
27
+ validate: false
28
+ auxk_alpha: 0.03125
29
+ threshold_beta: 0.999
30
+ threshold_start_step: 1000
31
+ threshold_dead_features: 100000
32
+ data:
33
+ dataset_names:
34
+ - mimic_findings_temporal
35
+ activations_type: per_token
36
+ num_workers: 18
37
+ batch_size: 8192
38
+ val_samples: 512000
39
+ train_samples: null
40
+ norm_act: true
41
+ norm_to_sqrt_act_dim: false
42
+ input_unit_norm: false
43
+ filter_dict: null
ef4/layer15_res_matryoshka_k256_ef4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58bde75f2707637c25bd760007647181875068f0bdffe4752fdeeb71a3034ad5
3
+ size 536955610
ef8/config.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sae:
2
+ type: matryoshka_batch_topk
3
+ activation_dim: 4096
4
+ expansion_factor: 8
5
+ layer_id: 15
6
+ hookpoint: residual
7
+ k: 256
8
+ group_fractions:
9
+ - 0.5
10
+ - 0.25
11
+ - 0.125
12
+ - 0.0625
13
+ - 0.0625
14
+ group_weights: null
15
+ trainer:
16
+ epochs: 1
17
+ lr: null
18
+ l1_penalty: 0.1
19
+ warmup_steps: 10
20
+ sparsity_warmup_steps: 0
21
+ decay_start: null
22
+ resample_steps: null
23
+ seed: 42
24
+ device: cuda:0
25
+ log_every_n_steps: 1000
26
+ logger_type: mlflow
27
+ validate: false
28
+ auxk_alpha: 0.03125
29
+ threshold_beta: 0.999
30
+ threshold_start_step: 1000
31
+ threshold_dead_features: 100000
32
+ data:
33
+ dataset_names:
34
+ - mimic_findings_temporal
35
+ activations_type: per_token
36
+ num_workers: 18
37
+ batch_size: 8192
38
+ val_samples: 512000
39
+ train_samples: null
40
+ norm_act: true
41
+ norm_to_sqrt_act_dim: false
42
+ input_unit_norm: false
43
+ filter_dict: null
ef8/layer15_res_matryoshka_k256_ef8.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a4f7a3b97b5c31ee1e1b53e791fd50a77cbdfed44e274abbfba3138eed4405
3
+ size 1073892058