|
K_step: 1000 |
|
accumulate_grad_batches: 1 |
|
audio_num_mel_bins: 128 |
|
audio_sample_rate: 44100 |
|
binarization_args: |
|
shuffle: false |
|
with_align: true |
|
with_f0: true |
|
with_hubert: true |
|
with_spk_embed: false |
|
with_wav: false |
|
binarizer_cls: preprocessing.SVCpre.SVCBinarizer |
|
binary_data_dir: data/binary/mori |
|
check_val_every_n_epoch: 10 |
|
choose_test_manually: false |
|
clip_grad_norm: 1 |
|
config_path: training/config_nsf.yaml |
|
content_cond_steps: [] |
|
cwt_add_f0_loss: false |
|
cwt_hidden_size: 128 |
|
cwt_layers: 2 |
|
cwt_loss: l1 |
|
cwt_std_scale: 0.8 |
|
datasets: |
|
- opencpop |
|
debug: false |
|
dec_ffn_kernel_size: 9 |
|
dec_layers: 4 |
|
decay_steps: 20000 |
|
decoder_type: fft |
|
dict_dir: '' |
|
diff_decoder_type: wavenet |
|
diff_loss_type: l2 |
|
dilation_cycle_length: 4 |
|
dropout: 0.1 |
|
ds_workers: 4 |
|
dur_enc_hidden_stride_kernel: |
|
- 0,2,3 |
|
- 0,2,3 |
|
- 0,1,3 |
|
dur_loss: mse |
|
dur_predictor_kernel: 3 |
|
dur_predictor_layers: 5 |
|
enc_ffn_kernel_size: 9 |
|
enc_layers: 4 |
|
encoder_K: 8 |
|
encoder_type: fft |
|
endless_ds: false |
|
f0_bin: 256 |
|
f0_max: 1100.0 |
|
f0_min: 40.0 |
|
ffn_act: gelu |
|
ffn_padding: SAME |
|
fft_size: 2048 |
|
fmax: 16000 |
|
fmin: 40 |
|
fs2_ckpt: '' |
|
gaussian_start: true |
|
gen_dir_name: '' |
|
gen_tgt_spk_id: -1 |
|
hidden_size: 256 |
|
hop_size: 512 |
|
hubert_gpu: true |
|
hubert_path: ckpt/hubert/hubert_soft.pt |
|
infer: false |
|
keep_bins: 128 |
|
lambda_commit: 0.25 |
|
lambda_energy: 0.0 |
|
lambda_f0: 1.0 |
|
lambda_ph_dur: 0.3 |
|
lambda_sent_dur: 1.0 |
|
lambda_uv: 1.0 |
|
lambda_word_dur: 1.0 |
|
load_ckpt: '' |
|
log_interval: 100 |
|
loud_norm: false |
|
lr: 0.0004 |
|
max_beta: 0.02 |
|
max_epochs: 3000 |
|
max_eval_sentences: 1 |
|
max_eval_tokens: 60000 |
|
max_frames: 42000 |
|
max_input_tokens: 60000 |
|
max_sentences: 44 |
|
max_tokens: 128000 |
|
max_updates: 1000000 |
|
mel_loss: ssim:0.5|l1:0.5 |
|
mel_vmax: 1.5 |
|
mel_vmin: -6.0 |
|
min_level_db: -120 |
|
no_fs2: true |
|
norm_type: gn |
|
num_ckpt_keep: 10 |
|
num_heads: 2 |
|
num_sanity_val_steps: 1 |
|
num_spk: 1 |
|
num_test_samples: 0 |
|
num_valid_plots: 10 |
|
optimizer_adam_beta1: 0.9 |
|
optimizer_adam_beta2: 0.98 |
|
out_wav_norm: false |
|
pe_ckpt: ckpt/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt |
|
pe_enable: false |
|
perform_enhance: true |
|
pitch_ar: false |
|
pitch_enc_hidden_stride_kernel: |
|
- 0,2,5 |
|
- 0,2,5 |
|
- 0,2,5 |
|
pitch_extractor: parselmouth |
|
pitch_loss: l2 |
|
pitch_norm: log |
|
pitch_type: frame |
|
pndm_speedup: 10 |
|
pre_align_args: |
|
allow_no_txt: false |
|
denoise: false |
|
forced_align: mfa |
|
txt_processor: zh_g2pM |
|
use_sox: true |
|
use_tone: false |
|
pre_align_cls: data_gen.singing.pre_align.SingingPreAlign |
|
predictor_dropout: 0.5 |
|
predictor_grad: 0.1 |
|
predictor_hidden: -1 |
|
predictor_kernel: 5 |
|
predictor_layers: 5 |
|
prenet_dropout: 0.5 |
|
prenet_hidden_size: 256 |
|
pretrain_fs_ckpt: '' |
|
processed_data_dir: xxx |
|
profile_infer: false |
|
raw_data_dir: data/raw/mori |
|
ref_norm_layer: bn |
|
rel_pos: true |
|
reset_phone_dict: true |
|
residual_channels: 512 |
|
residual_layers: 20 |
|
save_best: false |
|
save_ckpt: true |
|
save_codes: |
|
- configs |
|
- modules |
|
- src |
|
- utils |
|
save_f0: true |
|
save_gt: false |
|
schedule_type: linear |
|
seed: 1234 |
|
sort_by_len: true |
|
speaker_id: mori |
|
spec_max: |
|
- -0.019330784678459167 |
|
- 0.06567800790071487 |
|
- 0.228793203830719 |
|
- 0.11802903562784195 |
|
- 0.10110537707805634 |
|
- 0.4689864218235016 |
|
- 0.7036661505699158 |
|
- 0.8703911900520325 |
|
- 0.9361032843589783 |
|
- 0.8655202388763428 |
|
- 0.7785714864730835 |
|
- 0.8148141503334045 |
|
- 0.679547131061554 |
|
- 0.7320918440818787 |
|
- 0.8978622555732727 |
|
- 0.784517765045166 |
|
- 0.7389000058174133 |
|
- 0.9378820657730103 |
|
- 0.924305260181427 |
|
- 0.9783570766448975 |
|
- 0.9135251045227051 |
|
- 1.0022618770599365 |
|
- 1.0694200992584229 |
|
- 1.1138495206832886 |
|
- 1.089599609375 |
|
- 1.2263946533203125 |
|
- 1.1603796482086182 |
|
- 1.1704267263412476 |
|
- 1.1352192163467407 |
|
- 0.9894201159477234 |
|
- 0.8867910504341125 |
|
- 1.1269314289093018 |
|
- 1.0692254304885864 |
|
- 1.0560897588729858 |
|
- 0.9916731119155884 |
|
- 0.8903139233589172 |
|
- 0.8455407619476318 |
|
- 0.8520137071609497 |
|
- 0.9605215787887573 |
|
- 0.816070556640625 |
|
- 0.9223976731300354 |
|
- 0.5765433311462402 |
|
- 0.7290225625038147 |
|
- 0.5933732986450195 |
|
- 0.5727764964103699 |
|
- 0.7514742016792297 |
|
- 0.4809681475162506 |
|
- 0.4641554355621338 |
|
- 0.5157797336578369 |
|
- 0.5645105838775635 |
|
- 0.6428343057632446 |
|
- 0.5405521392822266 |
|
- 0.473175972700119 |
|
- 0.44530436396598816 |
|
- 0.4342418909072876 |
|
- 0.44113707542419434 |
|
- 0.49833279848098755 |
|
- 0.48190838098526 |
|
- 0.44705429673194885 |
|
- 0.3638623356819153 |
|
- 0.30654290318489075 |
|
- 0.4154691696166992 |
|
- 0.34633904695510864 |
|
- 0.35942935943603516 |
|
- 0.39546000957489014 |
|
- 0.2380419671535492 |
|
- 0.328754723072052 |
|
- 0.3026845455169678 |
|
- 0.24524439871311188 |
|
- 0.25909164547920227 |
|
- 0.1833379566669464 |
|
- 0.14229395985603333 |
|
- 0.200522318482399 |
|
- 0.15264283120632172 |
|
- 0.1986929476261139 |
|
- 0.060403868556022644 |
|
- 0.0036014982033520937 |
|
- 0.12869416177272797 |
|
- 0.05505308136343956 |
|
- -0.031196756288409233 |
|
- -0.0574474036693573 |
|
- -0.0543854720890522 |
|
- -0.036599062383174896 |
|
- -0.06735989451408386 |
|
- -0.09748432785272598 |
|
- 0.00449156342074275 |
|
- 0.05749598145484924 |
|
- 0.08902159333229065 |
|
- -0.11218735575675964 |
|
- -0.011491273529827595 |
|
- -0.11833566427230835 |
|
- -0.010254009626805782 |
|
- -0.06703109294176102 |
|
- -0.09058596193790436 |
|
- -0.039965495467185974 |
|
- -0.10134681314229965 |
|
- 0.07939088344573975 |
|
- 0.13496151566505432 |
|
- -0.002308185212314129 |
|
- 0.18585674464702606 |
|
- -0.03961731120944023 |
|
- -0.1287146657705307 |
|
- -0.22040295600891113 |
|
- -0.23626138269901276 |
|
- -0.17821004986763 |
|
- -0.04868573695421219 |
|
- -0.11022017896175385 |
|
- -0.23770904541015625 |
|
- -0.1830531656742096 |
|
- -0.2001497745513916 |
|
- -0.3840283453464508 |
|
- -0.32347947359085083 |
|
- -0.12253516912460327 |
|
- -0.22939082980155945 |
|
- -0.43113377690315247 |
|
- -0.3798638880252838 |
|
- -0.23814789950847626 |
|
- -0.39345860481262207 |
|
- -0.4185469448566437 |
|
- -0.5147720575332642 |
|
- -0.5336534380912781 |
|
- -0.6215676665306091 |
|
- -0.6951723098754883 |
|
- -0.7364555597305298 |
|
- -0.7716941833496094 |
|
- -0.7262001633644104 |
|
- -0.8554714918136597 |
|
- -0.9651122093200684 |
|
spec_min: |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
- -4.999994277954102 |
|
spk_cond_steps: [] |
|
stop_token_weight: 5.0 |
|
task_cls: training.task.SVC_task.SVCTask |
|
test_ids: [] |
|
test_input_dir: '' |
|
test_num: 0 |
|
test_prefixes: |
|
- test |
|
test_set_name: test |
|
timesteps: 1000 |
|
train_set_name: train |
|
use_crepe: true |
|
use_denoise: false |
|
use_energy_embed: false |
|
use_gt_dur: false |
|
use_gt_f0: false |
|
use_midi: false |
|
use_nsf: true |
|
use_pitch_embed: true |
|
use_pos_embed: true |
|
use_spk_embed: false |
|
use_spk_id: false |
|
use_split_spk_id: false |
|
use_uv: false |
|
use_var_enc: false |
|
use_vec: false |
|
val_check_interval: 2000 |
|
valid_num: 0 |
|
valid_set_name: valid |
|
vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN |
|
vocoder_ckpt: ckpt/nsf_hifigan/model |
|
warmup_updates: 2000 |
|
wav2spec_eps: 1e-6 |
|
weight_decay: 0 |
|
win_size: 2048 |
|
work_dir: ckpt/mori |
|
|