wietsedv commited on
Commit
0b2f528
1 Parent(s): 79f6ef6

add model files

Browse files
README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - text-to-speech
6
+ language: gos
7
+ ---
8
+
9
+ # Tacotron2 Gronings
exp/tts_ljspeech_finetune_tacotron2.v5/config.yaml ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/finetune_tacotron2.v5.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_ljspeech_finetune_tacotron2.v5
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - attn_loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ grad_clip: 1.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_tensorboard: true
54
+ use_wandb: true
55
+ wandb_project: null
56
+ wandb_id: null
57
+ pretrain_path: null
58
+ init_param:
59
+ - downloads/3b0a779f28d99232479e782d4d20292b/exp/tts_train_tacotron2_raw_phn_tacotron_g2p_en_no_space/199epoch.pth:::tts.enc.embed,normalize
60
+ freeze_param: []
61
+ num_iters_per_epoch: 500
62
+ batch_size: 20
63
+ valid_batch_size: null
64
+ batch_bins: 3750000
65
+ valid_batch_bins: null
66
+ train_shape_file:
67
+ - exp/tts_stats_raw_char/train/text_shape.char
68
+ - exp/tts_stats_raw_char/train/speech_shape
69
+ valid_shape_file:
70
+ - exp/tts_stats_raw_char/valid/text_shape.char
71
+ - exp/tts_stats_raw_char/valid/speech_shape
72
+ batch_type: numel
73
+ valid_batch_type: null
74
+ fold_length:
75
+ - 150
76
+ - 204800
77
+ sort_in_batch: descending
78
+ sort_batch: descending
79
+ multiple_iterator: false
80
+ chunk_length: 500
81
+ chunk_shift_ratio: 0.5
82
+ num_cache_chunks: 1024
83
+ train_data_path_and_name_and_type:
84
+ - - dump/raw/train_nodev/text
85
+ - text
86
+ - text
87
+ - - dump/raw/train_nodev/wav.scp
88
+ - speech
89
+ - sound
90
+ valid_data_path_and_name_and_type:
91
+ - - dump/raw/train_dev/text
92
+ - text
93
+ - text
94
+ - - dump/raw/train_dev/wav.scp
95
+ - speech
96
+ - sound
97
+ allow_variable_data_keys: false
98
+ max_cache_size: 0.0
99
+ max_cache_fd: 32
100
+ valid_max_cache_size: null
101
+ optim: adam
102
+ optim_conf:
103
+ lr: 0.001
104
+ eps: 1.0e-06
105
+ weight_decay: 0.0
106
+ scheduler: null
107
+ scheduler_conf: {}
108
+ token_list:
109
+ - <blank>
110
+ - <unk>
111
+ - <space>
112
+ - e
113
+ - n
114
+ - a
115
+ - t
116
+ - i
117
+ - r
118
+ - o
119
+ - d
120
+ - l
121
+ - h
122
+ - s
123
+ - g
124
+ - k
125
+ - m
126
+ - j
127
+ - z
128
+ - v
129
+ - .
130
+ - w
131
+ - u
132
+ - b
133
+ - p
134
+ - c
135
+ - ''''
136
+ - f
137
+ - ','
138
+ - '?'
139
+ - y
140
+ - é
141
+ - ':'
142
+ - x
143
+ - '-'
144
+ - ë
145
+ - '!'
146
+ - q
147
+ - '"'
148
+ - á
149
+ - ï
150
+ - è
151
+ - ó
152
+ - í
153
+ - ;
154
+ - '&'
155
+ - )
156
+ - (
157
+ - ü
158
+ - ö
159
+ - ê
160
+ - ú
161
+ - /
162
+ - ä
163
+ - à
164
+ - <sos/eos>
165
+ odim: null
166
+ model_conf: {}
167
+ use_preprocessor: true
168
+ token_type: char
169
+ bpemodel: null
170
+ non_linguistic_symbols: null
171
+ cleaner: null
172
+ g2p: null
173
+ feats_extract: fbank
174
+ feats_extract_conf:
175
+ fs: 22050
176
+ fmin: 80
177
+ fmax: 7600
178
+ n_mels: 80
179
+ hop_length: 256
180
+ n_fft: 1024
181
+ win_length: null
182
+ normalize: global_mvn
183
+ normalize_conf:
184
+ stats_file: exp/tts_stats_raw_char/train/feats_stats.npz
185
+ tts: tacotron2
186
+ tts_conf:
187
+ embed_dim: 512
188
+ elayers: 1
189
+ eunits: 512
190
+ econv_layers: 3
191
+ econv_chans: 512
192
+ econv_filts: 5
193
+ atype: location
194
+ adim: 512
195
+ aconv_chans: 32
196
+ aconv_filts: 15
197
+ cumulate_att_w: true
198
+ dlayers: 2
199
+ dunits: 1024
200
+ prenet_layers: 2
201
+ prenet_units: 256
202
+ postnet_layers: 5
203
+ postnet_chans: 512
204
+ postnet_filts: 5
205
+ output_activation: null
206
+ use_batch_norm: true
207
+ use_concate: true
208
+ use_residual: false
209
+ dropout_rate: 0.5
210
+ zoneout_rate: 0.1
211
+ reduction_factor: 2
212
+ spk_embed_dim: null
213
+ use_masking: true
214
+ bce_pos_weight: 5.0
215
+ use_guided_attn_loss: true
216
+ guided_attn_loss_sigma: 0.4
217
+ guided_attn_loss_lambda: 1.0
218
+ pitch_extract: null
219
+ pitch_extract_conf: {}
220
+ pitch_normalize: null
221
+ pitch_normalize_conf: {}
222
+ energy_extract: null
223
+ energy_extract_conf: {}
224
+ energy_normalize: null
225
+ energy_normalize_conf: {}
226
+ required:
227
+ - output_dir
228
+ - token_list
229
+ version: 0.9.7
230
+ distributed: false
exp/tts_ljspeech_finetune_tacotron2.v5/images/attn_loss.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/backward_time.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/bce_loss.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/forward_time.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/iter_time.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/l1_loss.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/loss.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/lr_0.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/mse_loss.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/optim_step_time.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/images/train_time.png ADDED
exp/tts_ljspeech_finetune_tacotron2.v5/train.loss.ave_5best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b0e7cf1f3beae57ae5ee4429ab8e5bc002947bcad64d9c15dd4d7191d88bb6a
3
+ size 107312986
exp/tts_stats_raw_char/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
meta.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.9.7
2
+ files:
3
+ model_file: exp/tts_ljspeech_finetune_tacotron2.v5/train.loss.ave_5best.pth
4
+ python: "3.8.8 | packaged by conda-forge | (default, Feb 20 2021, 16:22:27) \n[GCC\
5
+ \ 9.3.0]"
6
+ timestamp: 1614414989.925269
7
+ torch: 1.7.1
8
+ yaml_files:
9
+ train_config: exp/tts_ljspeech_finetune_tacotron2.v5/config.yaml
tts_ljspeech_finetune_tacotron2.v5_train.loss.ave.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65c4b51686bfcc1754f40e8570d1a48574f1c97ce5105cb90e9ec9a0cbc6464a
3
+ size 107754064