mrfakename commited on
Commit
079b5bc
·
verified ·
1 Parent(s): ab6232e

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.yaml +232 -0
  2. decoder.ckpt +3 -0
config.yaml ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ acous_params:
2
+ - - 480
3
+ - 1200
4
+ - 80
5
+ - - 240
6
+ - 1200
7
+ - 160
8
+ amp: false
9
+ audio_num_mel_bins: 160
10
+ audio_sample_rate: 24000
11
+ c_spk_enc: 512
12
+ char_dict_size: 15000
13
+ conv_use_pos: false
14
+ dec0_dilations:
15
+ - 1
16
+ - 2
17
+ - 4
18
+ - 1
19
+ - 2
20
+ - 4
21
+ - 1
22
+ dec0_kernel_size: 3
23
+ dec_dilations:
24
+ - 1
25
+ - 2
26
+ - 1
27
+ - 2
28
+ - 1
29
+ dec_ffn_kernel_size: 9
30
+ dec_inp_add_noise: false
31
+ dec_kernel_size: 5
32
+ dec_layers: 4
33
+ dec_post_net_kernel: 3
34
+ decoder_rnn_dim: 0
35
+ decoder_type: conv
36
+ dropout: 0.0
37
+ ds_add_pitch_embed: false
38
+ dur_alpha: 1.0
39
+ dur_context_enc: true
40
+ dur_log: true
41
+ dur_predictor_kernel: 3
42
+ dur_predictor_layers: 2
43
+ dur_use_char: true
44
+ dur_use_spk: true
45
+ enc_dec_norm: ln
46
+ enc_dilations:
47
+ - 1
48
+ - 1
49
+ - 1
50
+ - 1
51
+ enc_ffn_kernel_size: 5
52
+ enc_kernel_size: 5
53
+ enc_layers: 8
54
+ enc_post_net_kernel: 3
55
+ enc_pre_ln: true
56
+ enc_prenet: true
57
+ encoder_K: 8
58
+ encoder_type: rel_fft
59
+ f0_max: 600
60
+ f0_min: 60
61
+ ffn_act: gelu
62
+ ffn_hidden_size: 1024
63
+ fft_size: 1200
64
+ fg_spk_enc_hidden: 256
65
+ fmax: 12000
66
+ fmin: 0
67
+ frames_multiple: 8
68
+ hidden_size: 512
69
+ hop_size: 240
70
+ ignore_begin_end_sil: false
71
+ keep_c0_init: true
72
+ kl_min: 0
73
+ kl_start_steps: 1
74
+ lat_for_dur: false
75
+ latent_dim: 16
76
+ latent_size: 256
77
+ layers_in_block: 2
78
+ ling_label_dict_size:
79
+ - 20
80
+ - 4
81
+ - 5
82
+ - 2
83
+ - 3
84
+ - 3
85
+ - 3
86
+ - 6
87
+ - 15
88
+ ling_labels:
89
+ - tone
90
+ load_ckpt: ''
91
+ loud_norm: false
92
+ mel_vmax: 0.5
93
+ mel_vmin: -6
94
+ min_frames: 50
95
+ mixed_precision: bf16
96
+ no_text_enc: false
97
+ nsf_type: none
98
+ num_heads: 2
99
+ out_wav_norm: true
100
+ pad_frames: false
101
+
102
+ precision: fp16
103
+ predict_pitch: false
104
+ resblock: '1'
105
+ resblock_dilation_sizes:
106
+ - - 1
107
+ - 3
108
+ - 5
109
+ - - 1
110
+ - 3
111
+ - 5
112
+ - - 1
113
+ - 3
114
+ - 5
115
+ resblock_kernel_sizes:
116
+ - 3
117
+ - 7
118
+ - 11
119
+ train_spk_embed_only: false
120
+ upsample_initial_channel: 512
121
+ upsample_kernel_sizes:
122
+ - 12
123
+ - 11
124
+ - 8
125
+ - 4
126
+ upsample_rates:
127
+ - 6
128
+ - 5
129
+ - 4
130
+ - 2
131
+ use_bert_input: false
132
+ use_cfg: true
133
+ use_char: true
134
+ use_cur_global: false
135
+ use_cur_global_dec: true
136
+ use_dur_embed: true
137
+ use_dur_mask_embed: true
138
+ use_ema: false
139
+ use_expand_ph: true
140
+ use_finegrained_spk: false
141
+ use_global_lat: false
142
+ use_gt_dur: false
143
+ use_gt_f0: false
144
+ use_mix_spk_embed: false
145
+ use_new_vae: false
146
+ use_ph_level_f0: false
147
+ use_ph_pos_embed: true
148
+ use_pitch_embed: false
149
+ use_pitch_embed_dec: false
150
+ use_pitch_pred: true
151
+ use_pos_embed: true
152
+ use_qk_norm: true
153
+ use_random_spk_embed: false
154
+ use_seq_cfg: true
155
+ use_spk_embed: false
156
+ use_spk_enc: true
157
+ use_spk_id: false
158
+ use_uv: true
159
+ use_vae: true
160
+ use_vpcfm: true
161
+ use_vqvae: true
162
+ use_word_encoder: true
163
+ use_word_input: false
164
+ vae_dur_grad: 0.1
165
+ vae_enc_hidden_size: 384
166
+ vae_stride: 4
167
+ vae_word_conder_layers: 0
168
+ vq_stride: 8
169
+ win_size: 1200
170
+ word_dict_size: 10000
171
+ melgan_config:
172
+ all_noise: false
173
+ backbone_resampling: librosa_kaiser_best
174
+ batch_size: 8
175
+ cond_disc: false
176
+ dim_pitch_condition: 1
177
+ downsamp_factor: 4
178
+ epochs: 1000
179
+ frame_shift: 240
180
+ lambda_feat: 0.0
181
+ lambda_log_pitch: 0.4
182
+ lambda_voiced: 1.0
183
+ load_D: 1
184
+ log_interval: 100
185
+ loss_pitch: 1.0
186
+ loss_speaker: 1.0
187
+ loss_stft: 0.0
188
+ lr: 0.0005
189
+ mode_pitch_condition: singgan_torch
190
+ multi_resolution: 0
191
+ n_layers_D: 4
192
+ n_mel_channels: 160
193
+ n_residual_layers: 4
194
+ n_test_samples: 5
195
+ ndf: 16
196
+ noise_index: 1.0
197
+ nr: 0
198
+ num_D: 3
199
+ num_band: 1
200
+ num_workers: 0
201
+ offset: 0
202
+ pretrain_steps: 0
203
+ res_layers: 1
204
+ run_hdfs: 0
205
+ sampling_rate: 24000
206
+ save_interval: 5000
207
+ seq_len: 100
208
+ single_stft: 0
209
+ sub_dis: 1
210
+ tf: 1
211
+ tf_end_ratio: 0.0
212
+ tf_end_step: 0
213
+ tf_start_ratio: 0.0
214
+ tf_start_step: 0
215
+ up_sample:
216
+ - 5
217
+ - 4
218
+ - 4
219
+ - 3
220
+ use_F_dis: 0
221
+ use_aug_pitch: 0
222
+ use_interpolate: 0
223
+ use_lsgan: 1
224
+ use_mel_loss: 1
225
+ use_melnorm: 0
226
+ use_msg_gan: 0
227
+ use_pitch_condition: false
228
+ use_pitch_prediction: 1
229
+ use_sbd: 0
230
+ use_speaker_prediction: 0
231
+ use_tanh: true
232
+ use_time_loss: 1
decoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f309a09fa0c629551d86b0bc68f5d0c4a24448c5fea6bbed421fb3772977a363
3
+ size 904541298