Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
eb57397
1
Parent(s):
2b6da0d
Improve training steps
Browse files- training/.gitignore +2 -1
- training/STEPS.md +18 -4
- training/train_vits.yaml +7 -5
training/.gitignore
CHANGED
|
@@ -1 +1,2 @@
|
|
| 1 |
-
espnet
|
|
|
|
|
|
| 1 |
+
espnet
|
| 2 |
+
data
|
training/STEPS.md
CHANGED
|
@@ -2,10 +2,9 @@ Setup env
|
|
| 2 |
Link: https://espnet.github.io/espnet/installation.html
|
| 3 |
|
| 4 |
0. `sudo apt-get install cmake sox libsndfile1-dev ffmpeg`
|
| 5 |
-
1. `git clone https://github.com/espnet/espnet
|
| 6 |
-
`conda create -p ./.venv python=3.8`
|
| 7 |
-
`conda install -c anaconda cudatoolkit`
|
| 8 |
2. `cd ./espnet/tools`
|
|
|
|
| 9 |
3. `CONDA_TOOLS_DIR=$(dirname ${CONDA_EXE})/..`
|
| 10 |
./setup_anaconda.sh ${CONDA_TOOLS_DIR} espnet 3.8
|
| 11 |
5. `make`
|
|
@@ -16,4 +15,19 @@ make
|
|
| 16 |
# run training
|
| 17 |
|
| 18 |
cd ../egs2/ljspeech/tts1
|
| 19 |
-
./run.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
Link: https://espnet.github.io/espnet/installation.html
|
| 3 |
|
| 4 |
0. `sudo apt-get install cmake sox libsndfile1-dev ffmpeg`
|
| 5 |
+
1. `git clone --branch v.202209 https://github.com/espnet/espnet`
|
|
|
|
|
|
|
| 6 |
2. `cd ./espnet/tools`
|
| 7 |
+
./setup_anaconda.sh anaconda espnet 3.8
|
| 8 |
3. `CONDA_TOOLS_DIR=$(dirname ${CONDA_EXE})/..`
|
| 9 |
./setup_anaconda.sh ${CONDA_TOOLS_DIR} espnet 3.8
|
| 10 |
5. `make`
|
|
|
|
| 15 |
# run training
|
| 16 |
|
| 17 |
cd ../egs2/ljspeech/tts1
|
| 18 |
+
./run.sh
|
| 19 |
+
|
| 20 |
+
./run.sh \
|
| 21 |
+
--stage 2 \
|
| 22 |
+
--use_sid true \
|
| 23 |
+
--fs 22050 \
|
| 24 |
+
--n_fft 1024 \
|
| 25 |
+
--n_shift 256 \
|
| 26 |
+
--win_length null \
|
| 27 |
+
--dumpdir dump/22k \
|
| 28 |
+
--expdir exp/22k \
|
| 29 |
+
--tts_task gan_tts \
|
| 30 |
+
--feats_extract linear_spectrogram \
|
| 31 |
+
--feats_normalize none \
|
| 32 |
+
--train_config ./conf/tuning/train_vits.yaml \
|
| 33 |
+
--inference_config ./conf/tuning/decode_vits.yaml
|
training/train_vits.yaml
CHANGED
|
@@ -16,8 +16,8 @@ tts_conf:
|
|
| 16 |
generator_type: vits_generator
|
| 17 |
generator_params:
|
| 18 |
hidden_channels: 192
|
| 19 |
-
spks:
|
| 20 |
-
global_channels:
|
| 21 |
segment_size: 32
|
| 22 |
text_encoder_attention_heads: 2
|
| 23 |
text_encoder_ffn_expand: 4
|
|
@@ -159,16 +159,18 @@ generator_first: false # whether to start updating generator first
|
|
| 159 |
# OTHER TRAINING SETTING #
|
| 160 |
##########################################################
|
| 161 |
#num_iters_per_epoch: 1000 # number of iterations per epoch
|
| 162 |
-
max_epoch:
|
| 163 |
accum_grad: 1 # gradient accumulation
|
| 164 |
-
batch_bins:
|
| 165 |
batch_type: numel # how to make batch
|
|
|
|
| 166 |
grad_clip: -1 # gradient clipping norm
|
| 167 |
grad_noise: false # whether to use gradient noise injection
|
| 168 |
sort_in_batch: descending # how to sort data in making batch
|
| 169 |
sort_batch: descending # how to sort created batches
|
| 170 |
-
num_workers:
|
| 171 |
use_amp: false # whether to use pytorch amp
|
|
|
|
| 172 |
log_interval: 50 # log interval in iterations
|
| 173 |
keep_nbest_models: 10 # number of models to keep
|
| 174 |
num_att_plot: 3 # number of attention figures to be saved in every check
|
|
|
|
| 16 |
generator_type: vits_generator
|
| 17 |
generator_params:
|
| 18 |
hidden_channels: 192
|
| 19 |
+
spks: 128
|
| 20 |
+
global_channels: 256
|
| 21 |
segment_size: 32
|
| 22 |
text_encoder_attention_heads: 2
|
| 23 |
text_encoder_ffn_expand: 4
|
|
|
|
| 159 |
# OTHER TRAINING SETTING #
|
| 160 |
##########################################################
|
| 161 |
#num_iters_per_epoch: 1000 # number of iterations per epoch
|
| 162 |
+
max_epoch: 30 # number of epochs
|
| 163 |
accum_grad: 1 # gradient accumulation
|
| 164 |
+
batch_bins: 1900000 # batch bins (feats_type=raw)
|
| 165 |
batch_type: numel # how to make batch
|
| 166 |
+
#batch_type: sorted # how to make batchbatch_size: 1
|
| 167 |
grad_clip: -1 # gradient clipping norm
|
| 168 |
grad_noise: false # whether to use gradient noise injection
|
| 169 |
sort_in_batch: descending # how to sort data in making batch
|
| 170 |
sort_batch: descending # how to sort created batches
|
| 171 |
+
num_workers: 1 # number of workers of data loader
|
| 172 |
use_amp: false # whether to use pytorch amp
|
| 173 |
+
train_dtype: float32
|
| 174 |
log_interval: 50 # log interval in iterations
|
| 175 |
keep_nbest_models: 10 # number of models to keep
|
| 176 |
num_att_plot: 3 # number of attention figures to be saved in every check
|