ctigges's picture
Upload 29 files
25c9670 verified
{
"model_path": "trained_gpt2_clts/tied-per-target-skip/tied_per_target_skip_global_batchtopk_jumprelu",
"config": {
"num_features": 122880,
"num_layers": 12,
"d_model": 768,
"model_name": null,
"normalization_method": "mean_std",
"activation_fn": "jumprelu",
"jumprelu_threshold": 0.0,
"batchtopk_k": null,
"batchtopk_straight_through": false,
"topk_k": null,
"topk_straight_through": true,
"topk_mode": "global",
"two_stage_batchtopk": false,
"two_stage_topk": false,
"clt_dtype": null,
"expected_input_dtype": null,
"mlp_input_template": null,
"mlp_output_template": null,
"tl_input_template": null,
"tl_output_template": null,
"decoder_tying": "per_target",
"enable_feature_offset": false,
"enable_feature_scale": false,
"skip_connection": true
},
"layer_stats": {
"0": {
"avg_l0": 4.6845703125,
"max_l0": 45.0
},
"1": {
"avg_l0": 3.1630859375,
"max_l0": 56.0
},
"2": {
"avg_l0": 6.1865234375,
"max_l0": 232.0
},
"3": {
"avg_l0": 4.947265625,
"max_l0": 26.0
},
"4": {
"avg_l0": 5.6328125,
"max_l0": 83.0
},
"5": {
"avg_l0": 4.9423828125,
"max_l0": 189.0
},
"6": {
"avg_l0": 8.0615234375,
"max_l0": 240.0
},
"7": {
"avg_l0": 9.8701171875,
"max_l0": 210.0
},
"8": {
"avg_l0": 12.326171875,
"max_l0": 332.0
},
"9": {
"avg_l0": 21.96875,
"max_l0": 624.0
},
"10": {
"avg_l0": 32.90234375,
"max_l0": 306.0
},
"11": {
"avg_l0": 44.5126953125,
"max_l0": 521.0
}
}
}