patriotyk commited on
Commit
e0ee43b
·
verified ·
1 Parent(s): fbd4723

upload model

Browse files
Files changed (2) hide show
  1. config.yml +48 -0
  2. pytorch_model.bin +3 -0
config.yml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ multispeaker: true
3
+
4
+ dim_in: 64
5
+ hidden_dim: 512
6
+ max_conv_dim: 512
7
+ n_layer: 3
8
+ n_mels: 80
9
+
10
+ n_token: 181 # number of phoneme tokens
11
+ max_dur: 50 # maximum duration of a single phoneme
12
+ style_dim: 128 # style vector size
13
+
14
+ dropout: 0.2
15
+
16
+ # config for decoder
17
+ decoder:
18
+ type: 'hifigan' # either hifigan or istftnet
19
+ resblock_kernel_sizes: [3,7,11]
20
+ upsample_rates : [10,5,3,2]
21
+ upsample_initial_channel: 512
22
+ resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
23
+ upsample_kernel_sizes: [20,10,6,4]
24
+
25
+ # speech language model config
26
+ slm:
27
+ model: ''
28
+ sr: 16000 # sampling rate of SLM
29
+ hidden: 768 # hidden size of SLM
30
+ nlayers: 13 # number of layers of SLM
31
+ initial_channel: 64 # initial channels of SLM discriminator head
32
+
33
+ # style diffusion model config
34
+ diffusion:
35
+ embedding_mask_proba: 0.1
36
+ # transformer config
37
+ transformer:
38
+ num_layers: 3
39
+ num_heads: 8
40
+ head_features: 64
41
+ multiplier: 2
42
+
43
+ # diffusion distribution config
44
+ dist:
45
+ sigma_data: 0.19988229232390187 # placeholder for estimate_sigma_data set to false
46
+ estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
47
+ mean: -3.0
48
+ std: 1.0
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd0e06ecc528d25c5d6371c0a5dbfa742b008d0f83d843738e411efeb03f851
3
+ size 766654558