speechbrain
/

asr-transformer-aishell

Automatic Speech Recognition

Model card Files Files and versions

Titouan commited on Apr 29, 2021

Commit

9de803c

·

1 Parent(s): 6c63eca

EncoderDecoderASR update

Files changed (1) hide show

hyperparams.yaml +14 -6

hyperparams.yaml CHANGED Viewed

@@ -86,15 +86,19 @@ tokenizer: !new:sentencepiece.SentencePieceProcessor
 asr_model: !new:torch.nn.ModuleList
     - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
-modules:
-    compute_features: !ref <compute_features>
-    pre_transformer: !ref <CNN>
     transformer: !ref <Transformer>
-    asr_model: !ref <asr_model>
     normalize: !ref <normalize>
-    beam_searcher: !ref <beam_searcher>
-beam_searcher: !new:speechbrain.decoders.S2STransformerBeamSearch
     modules: [!ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
     bos_index: !ref <bos_index>
     eos_index: !ref <eos_index>
@@ -106,6 +110,10 @@ beam_searcher: !new:speechbrain.decoders.S2STransformerBeamSearch
     using_eos_threshold: False
     length_normalization: True
 log_softmax: !new:torch.nn.LogSoftmax
     dim: -1

 asr_model: !new:torch.nn.ModuleList
     - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
+# Here, we extract the encoder from the Transformer model
+Tencoder: !new:speechbrain.lobes.models.transformer.TransformerASR.EncoderWrapper
     transformer: !ref <Transformer>
+# We compose the inference (encoder) pipeline.
+encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
+    input_shape: [null, null, !ref <n_mels>]
+    compute_features: !ref <compute_features>
     normalize: !ref <normalize>
+    cnn: !ref <CNN>
+    transformer_encoder: !ref <Tencoder>
+decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
     modules: [!ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
     bos_index: !ref <bos_index>
     eos_index: !ref <eos_index>
     using_eos_threshold: False
     length_normalization: True
+modules:
+    encoder: !ref <encoder>
+    decoder: !ref <decoder>
 log_softmax: !new:torch.nn.LogSoftmax
     dim: -1