mwhanna
/

qwen3-14b-transcoders

Model card Files Files and versions Community

qwen3-14b-transcoders / wandb-config.yaml

mwhanna's picture

Upload wandb-config.yaml

6a1d56b verified about 2 months ago

history blame contribute delete

1.68 kB

	_wandb:
	value:
	cli_version: 0.19.11
	m:
	- "1": gpu/memory_allocated_gb
	"6":
	- 3
	"7": []
	- "1": gpu/max_memory_allocated_gb
	"6":
	- 3
	"7": []
	- "1": gpu/memory_reserved_gb
	"6":
	- 3
	"7": []
	python_version: 3.11.10
	t:
	"1":
	- 1
	- 11
	- 49
	- 51
	- 55
	- 71
	"2":
	- 1
	- 11
	- 49
	- 51
	- 55
	- 71
	"3":
	- 2
	- 7
	- 13
	- 16
	- 23
	- 55
	- 61
	"4": 3.11.10
	"5": 0.19.11
	"6": 4.52.4
	"8":
	- 5
	"12": 0.19.11
	"13": linux-x86_64
	act_fn:
	value: relu
	batch_size:
	value: 8192
	before_ln:
	value: false
	c_coeff:
	value: 4
	cooldown_start_frac:
	value: 0.8
	d_feature:
	value: 163840
	d_model:
	value: 5120
	device:
	value: cuda:0
	initial_lr:
	value: 0.0002
	layer_idx:
	value: 0
	lr:
	value: 0.0002
	min_lr_ratio:
	value: 0
	model_name:
	value: Qwen/Qwen3-14B
	model_type:
	value: qwen
	n_batches:
	value: 277
	n_grad_steps:
	value: 4
	n_steps:
	value: 122070
	preact_coeff:
	value: 6e-05
	shuffle_buffer_batches:
	value: 32
	skip_connections:
	value: false
	sparsity_coeff_final:
	value: 8
	x_scale:
	value: 1
	y_scale:
	value: 1