mooncast
/

audio_detokenizer

Model card Files Files and versions

xet

Community

mrfakename commited on Apr 4

Commit

c943555

verified ·

1 Parent(s): c9440f9

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

config.yaml +119 -0
model.pt +3 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,119 @@

+accumulate_grad_batches: 1
+base_config: ''
+batch_max_tokens: 4000
+batch_size: 5
+cfg_init: 1.0
+cfg_scale: 4.0
+cfg_schedule: linear
+check_val_every_n_epoch: 10
+clip_grad_norm: 0.5
+data_dir: ''
+datamodule_target: ''
+debug: false
+deep_speed_strategy_stage: 2
+drop_last: true
+endless_ds: false
+exp_name: ''
+filter_args:
+  lang:
+  - zh
+  - en
+  max_spk_num: 6
+  speech_ratio: 0.6
+gradient_clip_val: 1.0
+indexed_ds: true
+infer: false
+infer_exp_name: ''
+infer_json_path: ''
+inference_ckpt: ''
+inference_mode: nonstreaming
+initialize_from: ''
+kimia_data_state_path: datastates/zeqian_ft.datastate
+learning_rate: 1e-4
+limit_val_batches: 100
+load_opt: false
+log_interval: 10
+logger_type: tensorboard
+loss:
+  mel_loss: l1
+max_epochs: 1000
+max_eval_sentences: -1
+max_eval_tokens: -1
+max_prompt_ratio: 0.5
+max_segment_cnt: 20000
+max_sentences: -1
+max_speech_duration: 20
+max_tokens: 31250
+max_training_steps: 200000
+max_updates: 160000
+mel_mean: -4.479605
+mel_std: 3.4584913
+meta_dir: null
+min_prompt_duration: 0.1
+min_speech_duration: -1
+model:
+  dit:
+    chunk_params:
+      hz: 50
+      max_chunk: 3.0
+      max_chunk_history: 500000
+      min_chunk: 0.5
+      need_block_shift: true
+    depth: 10
+    ffn_act_layer: gleu_tanh
+    ffn_conv_kernel_size: 5
+    ffn_gated_glu: false
+    ffn_type: vanilla_mlp
+    hidden_size: 2048
+    input_size: 80
+    max_seq_len: 4096
+    mlp_ratio: 4.0
+    num_heads: 16
+    position_embedding_type: skip
+    prompt_cfg_dropout: 0.2
+    rope_params:
+      max_position_embeddings: 4096
+      rope_base: 10000.0
+      rope_interpolation_factor: 1.0
+    semantic_cfg_dropout: 0.15
+    semantic_vocab_size: 8192
+    use_chunk_setting: true
+    use_rope: true
+  position_id_start_from: 0
+  random_position_start: true
+  restart_position_ids: false
+  upsample_args:
+    rate: 1.0
+need_merge_same_speaker: true
+no_verlap: true
+normalize_mel: true
+num_nodes: 4
+num_sanity_val_steps: 0
+num_workers: 3
+ode_steps: 150
+optimizer_adam_beta1: 0.9
+optimizer_adam_beta2: 0.98
+optimizer_class: adamw
+pin_memory: true
+precision: bf16-mixed
+save_topk: 10
+seed: 1234
+shuffle: true
+sort_by_len: true
+src_sample_rate: 16000
+strategy: ddp
+tensorboard_dir: ''
+test_num: 100
+tgt_sample_rate: 24000
+timescale: 240000
+use_cfg: false
+use_cfg_rescale: false
+use_chunk_setting: true
+use_distributed_sampler: false
+val_check_interval: 2000
+vocoder_ckpt: ''
+vocoder_config_path: ''
+wandb_name: ''
+warmup_updates: 2000
+weight_decay: 0.0001
+work_dir: ''

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2b8b960d4bffeb18cf139a5997e6f07146a678a678d630f935c71062a87d61e
+size 9426224086