{ "dim": 3072, "n_layers": 30, "head_dim": 128, "hidden_dim": 8192, "n_heads": 32, "n_kv_heads": 8, "rope_theta": 100000000.0, "norm_eps": 1e-05, "vocab_size": 131072, "max_position_embeddings": 32768, "multimodal": { "whisper_model_args": { "encoder_args": { "dim": 1280, "n_layers": 32, "head_dim": 64, "hidden_dim": 5120, "n_heads": 20, "vocab_size": 51866, "max_source_positions": 1500, "audio_encoding_args": { "sampling_rate": 16000, "num_mel_bins": 128, "hop_length": 160, "window_size": 400 } }, "downsample_args": { "downsample_factor": 4 } } } }