lich99 commited on
Commit
eb9f083
1 Parent(s): a12472a

upload config

Browse files
Files changed (5) hide show
  1. config/decoder.yaml +12 -0
  2. config/dvae.yaml +14 -0
  3. config/gpt.yaml +20 -0
  4. config/path.yaml +11 -0
  5. config/vocos.yaml +24 -0
config/decoder.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ dim: 384
4
+
5
+ decoder_config:
6
+ idim: ${dim}
7
+ odim: ${dim}
8
+ hidden: 512
9
+ n_layer: 12
10
+ bn_dim: 128
11
+
12
+ vq_config: null
config/dvae.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ dim: 512
4
+ decoder_config:
5
+ idim: ${dim}
6
+ odim: ${dim}
7
+ n_layer: 12
8
+ bn_dim: 128
9
+
10
+ vq_config:
11
+ dim: 1024
12
+ levels: [5,5,5,5]
13
+ G: 2
14
+ R: 2
config/gpt.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ num_audio_tokens: 626
4
+ num_text_tokens: 21178
5
+
6
+ gpt_config:
7
+ hidden_size: 768
8
+ intermediate_size: 3072
9
+ num_attention_heads: 12
10
+ num_hidden_layers: 20
11
+ use_cache: False
12
+ max_position_embeddings: 4096
13
+ # attn_implementation: flash_attention_2
14
+
15
+ spk_emb_dim: 192
16
+ spk_KL: False
17
+ num_audio_tokens: 626
18
+ num_text_tokens: null
19
+ num_vq: 4
20
+
config/path.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ vocos_config_path: config/vocos.yaml
4
+ vocos_ckpt_path: asset/Vocos.pt
5
+ dvae_config_path: config/dvae.yaml
6
+ dvae_ckpt_path: asset/DVAE.pt
7
+ gpt_config_path: config/gpt.yaml
8
+ gpt_ckpt_path: asset/GPT.pt
9
+ decoder_config_path: config/decoder.yaml
10
+ decoder_ckpt_path: asset/Decoder.pt
11
+ tokenizer_path: asset/tokenizer.pt
config/vocos.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature_extractor:
2
+ class_path: vocos.feature_extractors.MelSpectrogramFeatures
3
+ init_args:
4
+ sample_rate: 24000
5
+ n_fft: 1024
6
+ hop_length: 256
7
+ n_mels: 100
8
+ padding: center
9
+
10
+ backbone:
11
+ class_path: vocos.models.VocosBackbone
12
+ init_args:
13
+ input_channels: 100
14
+ dim: 512
15
+ intermediate_dim: 1536
16
+ num_layers: 8
17
+
18
+ head:
19
+ class_path: vocos.heads.ISTFTHead
20
+ init_args:
21
+ dim: 512
22
+ n_fft: 1024
23
+ hop_length: 256
24
+ padding: center