{ "model_path": "trained_gpt2_clts/untied-batchtopk/untied_global_batchtopk_jumprelu", "config": { "num_features": 32768, "num_layers": 12, "d_model": 768, "model_name": null, "normalization_method": "mean_std", "activation_fn": "jumprelu", "jumprelu_threshold": 0.0, "batchtopk_k": null, "batchtopk_straight_through": false, "topk_k": null, "topk_straight_through": true, "topk_mode": "global", "two_stage_batchtopk": false, "two_stage_topk": false, "clt_dtype": null, "expected_input_dtype": null, "mlp_input_template": null, "mlp_output_template": null, "tl_input_template": null, "tl_output_template": null, "decoder_tying": "none", "enable_feature_offset": false, "enable_feature_scale": false, "skip_connection": false }, "layer_stats": { "0": { "avg_l0": 6.1376953125, "max_l0": 36.0 }, "1": { "avg_l0": 3.8759765625, "max_l0": 51.0 }, "2": { "avg_l0": 6.123046875, "max_l0": 116.0 }, "3": { "avg_l0": 5.4716796875, "max_l0": 36.0 }, "4": { "avg_l0": 7.1689453125, "max_l0": 31.0 }, "5": { "avg_l0": 8.97265625, "max_l0": 52.0 }, "6": { "avg_l0": 14.6796875, "max_l0": 147.0 }, "7": { "avg_l0": 18.650390625, "max_l0": 116.0 }, "8": { "avg_l0": 24.76171875, "max_l0": 130.0 }, "9": { "avg_l0": 23.3310546875, "max_l0": 81.0 }, "10": { "avg_l0": 28.69140625, "max_l0": 130.0 }, "11": { "avg_l0": 40.8994140625, "max_l0": 330.0 } } }