diffusion_transformer / config.yaml
mrfakename's picture
Upload folder using huggingface_hub
284a6e8 verified
acous_params:
- - 480
- 1200
- 80
- - 240
- 1200
- 160
amp: true
audio_num_mel_bins: 160
audio_sample_rate: 24000
base_config:
- ./base_config.yaml
c_spk_enc: 512
char_dict_size: 15000
conv_use_pos: false
dec0_dilations:
- 1
- 2
- 4
- 1
- 2
- 4
- 1
dec0_kernel_size: 3
dec_dilations:
- 1
- 2
- 1
- 2
- 1
dec_ffn_kernel_size: 9
dec_inp_add_noise: false
dec_kernel_size: 5
dec_layers: 4
dec_post_net_kernel: 3
decoder_rnn_dim: 0
decoder_type: conv
dropout: 0.0
ds_add_pitch_embed: false
dur_alpha: 1.0
dur_context_enc: true
dur_log: true
dur_predictor_kernel: 3
dur_predictor_layers: 2
dur_use_char: true
dur_use_spk: true
enc_dec_norm: ln
enc_dilations:
- 1
- 1
- 1
- 1
enc_ffn_kernel_size: 5
enc_kernel_size: 5
enc_layers: 8
enc_post_net_kernel: 3
enc_pre_ln: true
enc_prenet: true
encoder_K: 8
encoder_type: rel_fft
f0_max: 600
f0_min: 60
ffn_act: gelu
ffn_hidden_size: 1024
fft_size: 1200
fg_spk_enc_hidden: 256
fmax: 12000
fmin: 0
frames_multiple: 8
gen_dir_name: ''
hidden_size: 512
hop_size: 240
ignore_begin_end_sil: false
keep_c0_init: true
kl_min: 0
kl_start_steps: 1
latent_dim: 32
latent_size: 256
layers_in_block: 2
ling_label_dict_size:
- 20
- 4
- 5
- 2
- 3
- 3
- 3
- 6
- 15
ling_labels:
- tone
loud_norm: false
max_input_tokens: 1550
mel_vmax: 0.5
mel_vmin: -6
min_frames: 50
mix_melout_timbre: true
mix_ph_timbre: false
mixed_precision: bf16
no_text_enc: false
num_heads: 2
out_wav_norm: true
pad_frames: false
precision: bf16
seed: 1234
use_bert_input: false
use_cfg: true
use_char: true
use_cur_global: false
use_cur_global_dec: true
use_dit_1b: false
use_dur_embed: true
use_dur_mask_embed: true
use_ema: false
use_expand_ph: true
use_finegrained_spk: false
use_global_lat: false
use_gt_dur: false
use_gt_f0: false
use_mix_spk_embed: false
use_new_vae: false
use_ph_level_f0: false
use_ph_pos_embed: true
use_pitch_embed: false
use_pitch_embed_dec: false
use_pitch_pred: true
use_pos_embed: true
use_qk_norm: true
use_random_spk_embed: false
use_seq_cfg: true
use_spk_embed: false
use_spk_enc: true
use_spk_id: false
use_uv: true
use_vae: true
use_vpcfm: true
use_vqvae: true
use_word_encoder: true
use_word_input: false
vae_dur_grad: 0.1
vae_enc_hidden_size: 384
vae_stride: 4
vae_word_conder_layers: 0
vq_stride: 8
vqvae_start_steps: 0
win_size: 1200
word_dict_size: 10000