{ "num_features": 32768, "num_layers": 12, "d_model": 768, "activation_fn": "topk", "topk_k": 16, "topk_straight_through": true, "topk_mode": "per_layer", "decoder_tying": "none", "enable_feature_offset": false, "enable_feature_scale": false, "skip_connection": false, "normalization_method": "mean_std" }