{ "num_features": 32768, "num_layers": 12, "d_model": 768, "model_name": "gpt2", "normalization_method": "mean_std", "activation_fn": "jumprelu", "jumprelu_threshold": 0.0, "batchtopk_k": null, "batchtopk_straight_through": true, "topk_straight_through": true, "two_stage_batchtopk": false, "two_stage_topk": false, "clt_dtype": null, "expected_input_dtype": "float16", "mlp_input_template": null, "mlp_output_template": null, "tl_input_template": null, "tl_output_template": null, "decoder_tying": "none", "enable_feature_offset": false, "enable_feature_scale": false, "skip_connection": false }