{ | |
"num_features": 32768, | |
"num_layers": 12, | |
"d_model": 768, | |
"activation_fn": "topk", | |
"topk_k": 16, | |
"topk_straight_through": true, | |
"topk_mode": "per_layer", | |
"decoder_tying": "none", | |
"enable_feature_offset": false, | |
"enable_feature_scale": false, | |
"skip_connection": false, | |
"normalization_method": "mean_std" | |
} |