qwen3-14b-transcoders / wandb-config.yaml
mwhanna's picture
Upload wandb-config.yaml
6a1d56b verified
_wandb:
value:
cli_version: 0.19.11
m:
- "1": gpu/memory_allocated_gb
"6":
- 3
"7": []
- "1": gpu/max_memory_allocated_gb
"6":
- 3
"7": []
- "1": gpu/memory_reserved_gb
"6":
- 3
"7": []
python_version: 3.11.10
t:
"1":
- 1
- 11
- 49
- 51
- 55
- 71
"2":
- 1
- 11
- 49
- 51
- 55
- 71
"3":
- 2
- 7
- 13
- 16
- 23
- 55
- 61
"4": 3.11.10
"5": 0.19.11
"6": 4.52.4
"8":
- 5
"12": 0.19.11
"13": linux-x86_64
act_fn:
value: relu
batch_size:
value: 8192
before_ln:
value: false
c_coeff:
value: 4
cooldown_start_frac:
value: 0.8
d_feature:
value: 163840
d_model:
value: 5120
device:
value: cuda:0
initial_lr:
value: 0.0002
layer_idx:
value: 0
lr:
value: 0.0002
min_lr_ratio:
value: 0
model_name:
value: Qwen/Qwen3-14B
model_type:
value: qwen
n_batches:
value: 277
n_grad_steps:
value: 4
n_steps:
value: 122070
preact_coeff:
value: 6e-05
shuffle_buffer_batches:
value: 32
skip_connections:
value: false
sparsity_coeff_final:
value: 8
x_scale:
value: 1
y_scale:
value: 1