brdhaker3 commited on
Commit
1da6f48
·
verified ·
1 Parent(s): e94f53a

Delete model/1234/save/CKPT+2024-05-27+00-52-30+00/train.yaml

Browse files
model/1234/save/CKPT+2024-05-27+00-52-30+00/train.yaml DELETED
@@ -1,187 +0,0 @@
1
- # ################################
2
- # Model: wav2vec2 + DNN + CTC
3
- # Augmentation: SpecAugment
4
- # Authors: Titouan Parcollet 2021
5
- # ################################
6
-
7
- # Seed needs to be set at top of yaml, before objects with parameters are made
8
- seed: 1234
9
- __set_seed: !!python/object/apply:torch.manual_seed [!ref <seed>]
10
- output_folder: !ref model/<seed>
11
- wer_file: !ref <output_folder>/wer.txt
12
- save_folder: !ref <output_folder>/save
13
- train_log: !ref <output_folder>/train_log.txt
14
-
15
- # URL for the biggest LeBenchmark wav2vec french.
16
- wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
17
-
18
- # Data files
19
- data_folder: /path/to/data # e.g, /localscratch/cv-corpus-5.1-2020-06-22/fr
20
- train_tsv_file: !ref <data_folder>/train.tsv # Standard CommonVoice .tsv files
21
- dev_tsv_file: !ref <data_folder>/dev.tsv # Standard CommonVoice .tsv files
22
- test_tsv_file: !ref <data_folder>/test.tsv # Standard CommonVoice .tsv files
23
- accented_letters: True
24
- language: fr # use 'it' for Italian, 'rw' for Kinyarwanda, 'en' for english
25
- train_csv: Data/train_wavs/train.csv
26
- valid_csv: Data/dev_wavs/dev.csv
27
- test_csv:
28
- - Data/test_wavs/test.csv
29
-
30
- skip_prep: True # Skip data preparation
31
-
32
- tokenizer: !new:speechbrain.dataio.encoder.CTCTextEncoder
33
-
34
- encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
35
- wav2vec2: !ref <wav2vec2>
36
- enc: !ref <enc>
37
- ctc_lin: !ref <ctc_lin>
38
- log_softmax: !ref <log_softmax>
39
-
40
- decoding_function: !name:speechbrain.decoders.ctc_greedy_decode
41
- blank_id: !ref <blank_index>
42
-
43
- use_language_modelling: True
44
- ngram_lm_path: languageModel.arpa
45
-
46
- # We remove utterance slonger than 10s in the train/dev/test sets as
47
- # longer sentences certainly correspond to "open microphones".
48
- avoid_if_longer_than: 10.0
49
- avoid_if_shorter_than: 1.2
50
-
51
-
52
- # Training parameters
53
- number_of_epochs: 12
54
- lr: 1.0
55
- lr_wav2vec: 0.0001
56
- sorting: ascending
57
- auto_mix_prec: False
58
- sample_rate: 16000
59
- ckpt_interval_minutes: 30 # save checkpoint every N min
60
-
61
- # With data_parallel batch_size is split into N jobs
62
- # With DDP batch_size is multiplied by N jobs
63
- # Must be 6 per GPU to fit 16GB of VRAM
64
- batch_size: 10
65
- test_batch_size: 4
66
-
67
- dataloader_options:
68
- batch_size: !ref <batch_size>
69
- num_workers: 6
70
- test_dataloader_options:
71
- batch_size: !ref <test_batch_size>
72
- num_workers: 6
73
-
74
- # BPE parameters
75
- token_type: char # ["unigram", "bpe", "char"]
76
- character_coverage: 1.0
77
-
78
- # Model parameters
79
- # activation: !name:torch.nn.LeakyReLU
80
- wav2vec_output_dim: 1024
81
- dnn_neurons: 1024
82
- freeze_wav2vec: False
83
- freeze_feature_extractor: True
84
- dropout: 0.15
85
- warmup_steps: 500 # The wav2vec 2 model isn't updated for this amount of steps
86
-
87
- # Outputs
88
- output_neurons: 40 # BPE size, index(blank/eos/bos) = 0
89
-
90
- # Decoding parameters
91
- # Be sure that the bos and eos index match with the BPEs ones
92
- blank_index: 0
93
- unk_index: 1
94
-
95
- #
96
- # Functions and classes
97
- #
98
- epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
99
- limit: !ref <number_of_epochs>
100
-
101
-
102
- enc: !new:speechbrain.nnet.containers.Sequential
103
- input_shape: [null, null, !ref <wav2vec_output_dim>]
104
- linear1: !name:speechbrain.nnet.linear.Linear
105
- n_neurons: !ref <dnn_neurons>
106
- bias: True
107
- bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
108
- activation: !new:torch.nn.LeakyReLU
109
- drop: !new:torch.nn.Dropout
110
- p: !ref <dropout>
111
- linear2: !name:speechbrain.nnet.linear.Linear
112
- n_neurons: !ref <dnn_neurons>
113
- bias: True
114
- bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
115
- activation2: !new:torch.nn.LeakyReLU
116
- drop2: !new:torch.nn.Dropout
117
- p: !ref <dropout>
118
- linear3: !name:speechbrain.nnet.linear.Linear
119
- n_neurons: !ref <dnn_neurons>
120
- bias: True
121
- bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
122
- activation3: !new:torch.nn.LeakyReLU
123
-
124
- wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
125
- source: wavlm-large/
126
- output_norm: False
127
- freeze: !ref <freeze_wav2vec>
128
- freeze_feature_extractor: !ref <freeze_feature_extractor>
129
- save_path: !ref <wav2vec2_folder>
130
-
131
-
132
- ctc_lin: !new:speechbrain.nnet.linear.Linear
133
- input_size: !ref <dnn_neurons>
134
- n_neurons: !ref <output_neurons>
135
-
136
- log_softmax: !new:speechbrain.nnet.activations.Softmax
137
- apply_log: True
138
-
139
- ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
140
- blank_index: !ref <blank_index>
141
-
142
- modules:
143
- wav2vec2: !ref <wav2vec2>
144
- enc: !ref <enc>
145
- ctc_lin: !ref <ctc_lin>
146
- encoder: !ref <encoder>
147
-
148
-
149
- model: !new:torch.nn.ModuleList
150
- - [!ref <enc>, !ref <ctc_lin>]
151
-
152
- model_opt_class: !name:torch.optim.Adadelta
153
- lr: !ref <lr>
154
- rho: 0.95
155
- eps: 1.e-8
156
-
157
- wav2vec_opt_class: !name:torch.optim.Adam
158
- lr: !ref <lr_wav2vec>
159
-
160
- lr_annealing_model: !new:speechbrain.nnet.schedulers.NewBobScheduler
161
- initial_value: !ref <lr>
162
- improvement_threshold: 0.0025
163
- annealing_factor: 0.8
164
- patient: 0
165
-
166
- lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
167
- initial_value: !ref <lr_wav2vec>
168
- improvement_threshold: 0.0025
169
- annealing_factor: 0.9
170
- patient: 0
171
-
172
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
173
- checkpoints_dir: !ref <save_folder>
174
- recoverables:
175
- wav2vec2: !ref <wav2vec2>
176
- model: !ref <model>
177
- scheduler_model: !ref <lr_annealing_model>
178
- scheduler_wav2vec: !ref <lr_annealing_wav2vec>
179
- counter: !ref <epoch_counter>
180
-
181
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
182
- save_file: !ref <train_log>
183
-
184
- error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
185
-
186
- cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
187
- split_tokens: True