Upload folder using huggingface_hub
Browse files- config.yaml +119 -0
- model.pt +3 -0
config.yaml
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
accumulate_grad_batches: 1
|
2 |
+
base_config: ''
|
3 |
+
batch_max_tokens: 4000
|
4 |
+
batch_size: 5
|
5 |
+
cfg_init: 1.0
|
6 |
+
cfg_scale: 4.0
|
7 |
+
cfg_schedule: linear
|
8 |
+
check_val_every_n_epoch: 10
|
9 |
+
clip_grad_norm: 0.5
|
10 |
+
data_dir: ''
|
11 |
+
datamodule_target: ''
|
12 |
+
debug: false
|
13 |
+
deep_speed_strategy_stage: 2
|
14 |
+
drop_last: true
|
15 |
+
endless_ds: false
|
16 |
+
exp_name: ''
|
17 |
+
filter_args:
|
18 |
+
lang:
|
19 |
+
- zh
|
20 |
+
- en
|
21 |
+
max_spk_num: 6
|
22 |
+
speech_ratio: 0.6
|
23 |
+
gradient_clip_val: 1.0
|
24 |
+
indexed_ds: true
|
25 |
+
infer: false
|
26 |
+
infer_exp_name: ''
|
27 |
+
infer_json_path: ''
|
28 |
+
inference_ckpt: ''
|
29 |
+
inference_mode: nonstreaming
|
30 |
+
initialize_from: ''
|
31 |
+
kimia_data_state_path: datastates/zeqian_ft.datastate
|
32 |
+
learning_rate: 1e-4
|
33 |
+
limit_val_batches: 100
|
34 |
+
load_opt: false
|
35 |
+
log_interval: 10
|
36 |
+
logger_type: tensorboard
|
37 |
+
loss:
|
38 |
+
mel_loss: l1
|
39 |
+
max_epochs: 1000
|
40 |
+
max_eval_sentences: -1
|
41 |
+
max_eval_tokens: -1
|
42 |
+
max_prompt_ratio: 0.5
|
43 |
+
max_segment_cnt: 20000
|
44 |
+
max_sentences: -1
|
45 |
+
max_speech_duration: 20
|
46 |
+
max_tokens: 31250
|
47 |
+
max_training_steps: 200000
|
48 |
+
max_updates: 160000
|
49 |
+
mel_mean: -4.479605
|
50 |
+
mel_std: 3.4584913
|
51 |
+
meta_dir: null
|
52 |
+
min_prompt_duration: 0.1
|
53 |
+
min_speech_duration: -1
|
54 |
+
model:
|
55 |
+
dit:
|
56 |
+
chunk_params:
|
57 |
+
hz: 50
|
58 |
+
max_chunk: 3.0
|
59 |
+
max_chunk_history: 500000
|
60 |
+
min_chunk: 0.5
|
61 |
+
need_block_shift: true
|
62 |
+
depth: 10
|
63 |
+
ffn_act_layer: gleu_tanh
|
64 |
+
ffn_conv_kernel_size: 5
|
65 |
+
ffn_gated_glu: false
|
66 |
+
ffn_type: vanilla_mlp
|
67 |
+
hidden_size: 2048
|
68 |
+
input_size: 80
|
69 |
+
max_seq_len: 4096
|
70 |
+
mlp_ratio: 4.0
|
71 |
+
num_heads: 16
|
72 |
+
position_embedding_type: skip
|
73 |
+
prompt_cfg_dropout: 0.2
|
74 |
+
rope_params:
|
75 |
+
max_position_embeddings: 4096
|
76 |
+
rope_base: 10000.0
|
77 |
+
rope_interpolation_factor: 1.0
|
78 |
+
semantic_cfg_dropout: 0.15
|
79 |
+
semantic_vocab_size: 8192
|
80 |
+
use_chunk_setting: true
|
81 |
+
use_rope: true
|
82 |
+
position_id_start_from: 0
|
83 |
+
random_position_start: true
|
84 |
+
restart_position_ids: false
|
85 |
+
upsample_args:
|
86 |
+
rate: 1.0
|
87 |
+
need_merge_same_speaker: true
|
88 |
+
no_verlap: true
|
89 |
+
normalize_mel: true
|
90 |
+
num_nodes: 4
|
91 |
+
num_sanity_val_steps: 0
|
92 |
+
num_workers: 3
|
93 |
+
ode_steps: 150
|
94 |
+
optimizer_adam_beta1: 0.9
|
95 |
+
optimizer_adam_beta2: 0.98
|
96 |
+
optimizer_class: adamw
|
97 |
+
pin_memory: true
|
98 |
+
precision: bf16-mixed
|
99 |
+
save_topk: 10
|
100 |
+
seed: 1234
|
101 |
+
shuffle: true
|
102 |
+
sort_by_len: true
|
103 |
+
src_sample_rate: 16000
|
104 |
+
strategy: ddp
|
105 |
+
tensorboard_dir: ''
|
106 |
+
test_num: 100
|
107 |
+
tgt_sample_rate: 24000
|
108 |
+
timescale: 240000
|
109 |
+
use_cfg: false
|
110 |
+
use_cfg_rescale: false
|
111 |
+
use_chunk_setting: true
|
112 |
+
use_distributed_sampler: false
|
113 |
+
val_check_interval: 2000
|
114 |
+
vocoder_ckpt: ''
|
115 |
+
vocoder_config_path: ''
|
116 |
+
wandb_name: ''
|
117 |
+
warmup_updates: 2000
|
118 |
+
weight_decay: 0.0001
|
119 |
+
work_dir: ''
|
model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2b8b960d4bffeb18cf139a5997e6f07146a678a678d630f935c71062a87d61e
|
3 |
+
size 9426224086
|