{ "model_path": "trained_gpt2_clts/tied-per-target-skip/tied_per_target_skip_global_batchtopk_jumprelu", "config": { "num_features": 122880, "num_layers": 12, "d_model": 768, "model_name": null, "normalization_method": "mean_std", "activation_fn": "jumprelu", "jumprelu_threshold": 0.0, "batchtopk_k": null, "batchtopk_straight_through": false, "topk_k": null, "topk_straight_through": true, "topk_mode": "global", "two_stage_batchtopk": false, "two_stage_topk": false, "clt_dtype": null, "expected_input_dtype": null, "mlp_input_template": null, "mlp_output_template": null, "tl_input_template": null, "tl_output_template": null, "decoder_tying": "per_target", "enable_feature_offset": false, "enable_feature_scale": false, "skip_connection": true }, "layer_stats": { "0": { "avg_l0": 4.6845703125, "max_l0": 45.0 }, "1": { "avg_l0": 3.1630859375, "max_l0": 56.0 }, "2": { "avg_l0": 6.1865234375, "max_l0": 232.0 }, "3": { "avg_l0": 4.947265625, "max_l0": 26.0 }, "4": { "avg_l0": 5.6328125, "max_l0": 83.0 }, "5": { "avg_l0": 4.9423828125, "max_l0": 189.0 }, "6": { "avg_l0": 8.0615234375, "max_l0": 240.0 }, "7": { "avg_l0": 9.8701171875, "max_l0": 210.0 }, "8": { "avg_l0": 12.326171875, "max_l0": 332.0 }, "9": { "avg_l0": 21.96875, "max_l0": 624.0 }, "10": { "avg_l0": 32.90234375, "max_l0": 306.0 }, "11": { "avg_l0": 44.5126953125, "max_l0": 521.0 } } }