Add files using upload-large-folder tool
Browse files- config.json +1 -1
- layers.16.mlp/cfg.json +1 -0
- layers.16.mlp/sae.safetensors +3 -0
- layers.17.mlp/cfg.json +1 -0
- layers.17.mlp/sae.safetensors +3 -0
- layers.18.mlp/cfg.json +1 -0
- layers.18.mlp/sae.safetensors +3 -0
- layers.19.mlp/cfg.json +1 -0
- layers.19.mlp/sae.safetensors +3 -0
- layers.20.mlp/cfg.json +1 -0
- layers.20.mlp/sae.safetensors +3 -0
- layers.21.mlp/cfg.json +1 -0
- layers.21.mlp/sae.safetensors +3 -0
- layers.22.mlp/cfg.json +1 -0
- layers.22.mlp/sae.safetensors +3 -0
- layers.23.mlp/cfg.json +1 -0
- layers.23.mlp/sae.safetensors +3 -0
- layers.24.mlp/cfg.json +1 -0
- layers.24.mlp/sae.safetensors +3 -0
- layers.25.mlp/cfg.json +1 -0
- layers.25.mlp/sae.safetensors +3 -0
- layers.26.mlp/cfg.json +1 -0
- layers.26.mlp/sae.safetensors +3 -0
- layers.27.mlp/cfg.json +1 -0
- layers.27.mlp/sae.safetensors +3 -0
- lr_scheduler.pt +1 -1
- optimizer.pt +1 -1
- state.pt +1 -1
config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"sae": {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 2, "grad_acc_steps": 4, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.
|
|
|
1 |
+
{"sae": {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 2, "grad_acc_steps": 4, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.16.mlp", "layers.17.mlp", "layers.18.mlp", "layers.19.mlp", "layers.20.mlp", "layers.21.mlp", "layers.22.mlp", "layers.23.mlp", "layers.24.mlp", "layers.25.mlp", "layers.26.mlp", "layers.27.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": true, "save_every": 1000, "log_to_wandb": true, "run_name": "sae-R1-1.5B-65k-part-2", "wandb_log_frequency": 1, "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "subset": null, "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
|
layers.16.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.16.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2921010ec0b1aa724b4a24de8f9c5a602d3e5f98e0b120693ebdd2cebe461b69
|
3 |
+
size 805574992
|
layers.17.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.17.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3152204a6fc7b05e5dfd816c01e658184130191fed9990ed883a755d657c25eb
|
3 |
+
size 805574992
|
layers.18.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.18.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5ecd52999057b5f233b89ea546c647dd33c2c06d33ad676e097c24840748341
|
3 |
+
size 805574992
|
layers.19.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.19.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fdab3ed291224e8be8b42937e30ee5d795b5c627e4753e729794a0f0c429748
|
3 |
+
size 805574992
|
layers.20.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.20.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06914443960bae4e22d8054c8c9843b4d190666aa34004031d78d12a59dbf74e
|
3 |
+
size 805574992
|
layers.21.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.21.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db22d6add0b03d252d673b5075b1f45dfe3c9969870075c65004aaa76a1945a9
|
3 |
+
size 805574992
|
layers.22.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.22.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:446cd67ccdc7464220d0852b6c762dcf00c6bb036426d97c1ea09d6fe0b84615
|
3 |
+
size 805574992
|
layers.23.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.23.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e29e4308a5da1710a5026b86e92de079f2cc5614cd54cf11605f19838602d59
|
3 |
+
size 805574992
|
layers.24.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.24.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17eb7656c4930ac05f90684fc5a29ee8aee7808a542951c101a2630733945952
|
3 |
+
size 805574992
|
layers.25.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.25.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b9c7ceb0df2960916222b89a32b1bed1b69f6f8de1a548c9667e2e1f3b5d1c6
|
3 |
+
size 805574992
|
layers.26.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.26.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a169cd2d9457274b2f6a8c57cc9cbd0998f554aa57d30a63473c661eaaee7c8
|
3 |
+
size 805574992
|
layers.27.mlp/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
|
layers.27.mlp/sae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4ac627d64906a2aa1f508fcf4d2d25d1eeb494d4dc97613e7caaf58730b617a
|
3 |
+
size 805574992
|
lr_scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1076
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35239b81b5e8c4e9c69dc3489ee4191e592bf034d435fe54365625a9f12c0de0
|
3 |
size 1076
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 818191552
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2eb104c6e80831353a88872cb7641d0fac70e0d35c7e301a2e068be7b1f3c7bf
|
3 |
size 818191552
|
state.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1049996
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ddc38ee274cdf2fffb433b4e8d89bf769002646c6e3ce4ad14406afb8a958cb
|
3 |
size 1049996
|