|
_target_: default_model.AstralQuantizer
|
|
tokenizer_name: "openai/whisper-small"
|
|
ssl_model_name: "facebook/hubert-large-ll60k"
|
|
ssl_output_layer: 18
|
|
encoder:
|
|
_target_: modules.convnext.ConvNeXtV2Stage
|
|
dim: 512
|
|
num_blocks: 12
|
|
intermediate_dim: 1536
|
|
dilation: 1
|
|
input_dim: 1024
|
|
quantizer:
|
|
_target_: modules.bsq.BinarySphericalQuantize
|
|
codebook_size: 32
|
|
dim: 512
|
|
entropy_loss_weight: 0.1
|
|
diversity_gamma: 1.0
|
|
spherical: True
|
|
enable_entropy_loss: True
|
|
soft_entropy_loss: True
|
|
decoder:
|
|
_target_: modules.convnext.ConvNeXtV2Stage
|
|
dim: 512
|
|
num_blocks: 12
|
|
intermediate_dim: 1536
|
|
dilation: 1
|
|
output_dim: 1024
|
|
gin_channels: 192
|
|
asr_decoder:
|
|
_target_: modules.asr_decoder.ASRDecoder
|
|
hidden_dim: 768
|
|
num_heads: 12
|
|
depth: 12
|
|
block_size: 4096
|
|
in_channels: 512
|
|
n_vocab: 51866
|
|
bos_id: 50528
|
|
eos_id: 50527
|
|
dropout_rate: 0.0
|
|
attn_dropout_rate: 0.0 |