luciaquirke commited on
Commit
072fdd1
·
verified ·
1 Parent(s): e942c28

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -1 +1 @@
1
- {"sae": {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 2, "grad_acc_steps": 4, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0.mlp", "layers.1.mlp", "layers.2.mlp", "layers.3.mlp", "layers.4.mlp", "layers.5.mlp", "layers.6.mlp", "layers.7.mlp", "layers.8.mlp", "layers.9.mlp", "layers.10.mlp", "layers.11.mlp", "layers.12.mlp", "layers.13.mlp", "layers.14.mlp", "layers.15.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": true, "save_every": 1000, "log_to_wandb": true, "run_name": "sae-DeepSeek-R1-Distill-Qwen-1.5B-65k", "wandb_log_frequency": 1, "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "subset": null, "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
 
1
+ {"sae": {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false}, "batch_size": 2, "grad_acc_steps": 4, "micro_acc_steps": 1, "lr": null, "lr_warmup_steps": 1000, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["layers.16.mlp", "layers.17.mlp", "layers.18.mlp", "layers.19.mlp", "layers.20.mlp", "layers.21.mlp", "layers.22.mlp", "layers.23.mlp", "layers.24.mlp", "layers.25.mlp", "layers.26.mlp", "layers.27.mlp"], "init_seeds": [0], "layers": [], "layer_stride": 1, "transcode": false, "distribute_modules": true, "save_every": 1000, "log_to_wandb": true, "run_name": "sae-R1-1.5B-65k-part-2", "wandb_log_frequency": 1, "model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "dataset": "EleutherAI/fineweb-edu-dedup-10b", "subset": null, "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": null, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 48}
layers.16.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.16.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2921010ec0b1aa724b4a24de8f9c5a602d3e5f98e0b120693ebdd2cebe461b69
3
+ size 805574992
layers.17.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.17.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3152204a6fc7b05e5dfd816c01e658184130191fed9990ed883a755d657c25eb
3
+ size 805574992
layers.18.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.18.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ecd52999057b5f233b89ea546c647dd33c2c06d33ad676e097c24840748341
3
+ size 805574992
layers.19.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.19.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fdab3ed291224e8be8b42937e30ee5d795b5c627e4753e729794a0f0c429748
3
+ size 805574992
layers.20.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.20.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06914443960bae4e22d8054c8c9843b4d190666aa34004031d78d12a59dbf74e
3
+ size 805574992
layers.21.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.21.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db22d6add0b03d252d673b5075b1f45dfe3c9969870075c65004aaa76a1945a9
3
+ size 805574992
layers.22.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.22.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:446cd67ccdc7464220d0852b6c762dcf00c6bb036426d97c1ea09d6fe0b84615
3
+ size 805574992
layers.23.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.23.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e29e4308a5da1710a5026b86e92de079f2cc5614cd54cf11605f19838602d59
3
+ size 805574992
layers.24.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.24.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17eb7656c4930ac05f90684fc5a29ee8aee7808a542951c101a2630733945952
3
+ size 805574992
layers.25.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.25.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b9c7ceb0df2960916222b89a32b1bed1b69f6f8de1a548c9667e2e1f3b5d1c6
3
+ size 805574992
layers.26.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.26.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a169cd2d9457274b2f6a8c57cc9cbd0998f554aa57d30a63473c661eaaee7c8
3
+ size 805574992
layers.27.mlp/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"expansion_factor": 64, "normalize_decoder": true, "num_latents": 65536, "k": 32, "multi_topk": false, "skip_connection": false, "d_in": 1536}
layers.27.mlp/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4ac627d64906a2aa1f508fcf4d2d25d1eeb494d4dc97613e7caaf58730b617a
3
+ size 805574992
lr_scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf208a9524911862838668e1825fa34dc5e29ef9c92e1eb7f9258ce6cad96710
3
  size 1076
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35239b81b5e8c4e9c69dc3489ee4191e592bf034d435fe54365625a9f12c0de0
3
  size 1076
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8965b87655e439a0800021d7d8c4a6de610a92c10de1cb6362844d62aa4b3a5f
3
  size 818191552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb104c6e80831353a88872cb7641d0fac70e0d35c7e301a2e068be7b1f3c7bf
3
  size 818191552
state.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edaae6a6dd3eecca5e933b467c1d0563006870a40881915dbf8ecd7a694420e5
3
  size 1049996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ddc38ee274cdf2fffb433b4e8d89bf769002646c6e3ce4ad14406afb8a958cb
3
  size 1049996