|
{ |
|
"model_path": "trained_gpt2_clts/tied-per-target-skip/tied_per_target_skip_global_batchtopk_jumprelu", |
|
"config": { |
|
"num_features": 122880, |
|
"num_layers": 12, |
|
"d_model": 768, |
|
"model_name": null, |
|
"normalization_method": "mean_std", |
|
"activation_fn": "jumprelu", |
|
"jumprelu_threshold": 0.0, |
|
"batchtopk_k": null, |
|
"batchtopk_straight_through": false, |
|
"topk_k": null, |
|
"topk_straight_through": true, |
|
"topk_mode": "global", |
|
"two_stage_batchtopk": false, |
|
"two_stage_topk": false, |
|
"clt_dtype": null, |
|
"expected_input_dtype": null, |
|
"mlp_input_template": null, |
|
"mlp_output_template": null, |
|
"tl_input_template": null, |
|
"tl_output_template": null, |
|
"decoder_tying": "per_target", |
|
"enable_feature_offset": false, |
|
"enable_feature_scale": false, |
|
"skip_connection": true |
|
}, |
|
"layer_stats": { |
|
"0": { |
|
"avg_l0": 4.6845703125, |
|
"max_l0": 45.0 |
|
}, |
|
"1": { |
|
"avg_l0": 3.1630859375, |
|
"max_l0": 56.0 |
|
}, |
|
"2": { |
|
"avg_l0": 6.1865234375, |
|
"max_l0": 232.0 |
|
}, |
|
"3": { |
|
"avg_l0": 4.947265625, |
|
"max_l0": 26.0 |
|
}, |
|
"4": { |
|
"avg_l0": 5.6328125, |
|
"max_l0": 83.0 |
|
}, |
|
"5": { |
|
"avg_l0": 4.9423828125, |
|
"max_l0": 189.0 |
|
}, |
|
"6": { |
|
"avg_l0": 8.0615234375, |
|
"max_l0": 240.0 |
|
}, |
|
"7": { |
|
"avg_l0": 9.8701171875, |
|
"max_l0": 210.0 |
|
}, |
|
"8": { |
|
"avg_l0": 12.326171875, |
|
"max_l0": 332.0 |
|
}, |
|
"9": { |
|
"avg_l0": 21.96875, |
|
"max_l0": 624.0 |
|
}, |
|
"10": { |
|
"avg_l0": 32.90234375, |
|
"max_l0": 306.0 |
|
}, |
|
"11": { |
|
"avg_l0": 44.5126953125, |
|
"max_l0": 521.0 |
|
} |
|
} |
|
} |