|
{ |
|
"model_path": "trained_gpt2_clts/untied-batchtopk/untied_global_batchtopk_jumprelu", |
|
"config": { |
|
"num_features": 32768, |
|
"num_layers": 12, |
|
"d_model": 768, |
|
"model_name": null, |
|
"normalization_method": "mean_std", |
|
"activation_fn": "jumprelu", |
|
"jumprelu_threshold": 0.0, |
|
"batchtopk_k": null, |
|
"batchtopk_straight_through": false, |
|
"topk_k": null, |
|
"topk_straight_through": true, |
|
"topk_mode": "global", |
|
"two_stage_batchtopk": false, |
|
"two_stage_topk": false, |
|
"clt_dtype": null, |
|
"expected_input_dtype": null, |
|
"mlp_input_template": null, |
|
"mlp_output_template": null, |
|
"tl_input_template": null, |
|
"tl_output_template": null, |
|
"decoder_tying": "none", |
|
"enable_feature_offset": false, |
|
"enable_feature_scale": false, |
|
"skip_connection": false |
|
}, |
|
"layer_stats": { |
|
"0": { |
|
"avg_l0": 6.1376953125, |
|
"max_l0": 36.0 |
|
}, |
|
"1": { |
|
"avg_l0": 3.8759765625, |
|
"max_l0": 51.0 |
|
}, |
|
"2": { |
|
"avg_l0": 6.123046875, |
|
"max_l0": 116.0 |
|
}, |
|
"3": { |
|
"avg_l0": 5.4716796875, |
|
"max_l0": 36.0 |
|
}, |
|
"4": { |
|
"avg_l0": 7.1689453125, |
|
"max_l0": 31.0 |
|
}, |
|
"5": { |
|
"avg_l0": 8.97265625, |
|
"max_l0": 52.0 |
|
}, |
|
"6": { |
|
"avg_l0": 14.6796875, |
|
"max_l0": 147.0 |
|
}, |
|
"7": { |
|
"avg_l0": 18.650390625, |
|
"max_l0": 116.0 |
|
}, |
|
"8": { |
|
"avg_l0": 24.76171875, |
|
"max_l0": 130.0 |
|
}, |
|
"9": { |
|
"avg_l0": 23.3310546875, |
|
"max_l0": 81.0 |
|
}, |
|
"10": { |
|
"avg_l0": 28.69140625, |
|
"max_l0": 130.0 |
|
}, |
|
"11": { |
|
"avg_l0": 40.8994140625, |
|
"max_l0": 330.0 |
|
} |
|
} |
|
} |