patriotyk
/

styletts2_ukrainian_single

Model card Files Files and versions Community

styletts2_ukrainian_single / config.yml

patriotyk's picture

Replace wrong model

0123370 verified 7 days ago

history blame contribute delete

1.23 kB


	multispeaker: false

	dim_in: 64
	hidden_dim: 512
	max_conv_dim: 512
	n_layer: 3
	n_mels: 80

	n_token: 181 # number of phoneme tokens
	max_dur: 50 # maximum duration of a single phoneme
	style_dim: 128 # style vector size

	dropout: 0.2

	# config for decoder
	decoder:
	type: 'istftnet' # either hifigan or istftnet
	resblock_kernel_sizes: [3,7,11]
	upsample_rates : [10, 6]
	upsample_initial_channel: 512
	resblock_dilation_sizes: [[1,3,5], [1,3,5], [1,3,5]]
	upsample_kernel_sizes: [20, 12]
	gen_istft_n_fft: 20
	gen_istft_hop_size: 5

	# speech language model config
	slm:
	model: 'openai/whisper-medium'
	sr: 16000 # sampling rate of SLM
	hidden: 768 # hidden size of SLM
	nlayers: 13 # number of layers of SLM
	initial_channel: 64 # initial channels of SLM discriminator head

	# style diffusion model config
	diffusion:
	embedding_mask_proba: 0.1
	# transformer config
	transformer:
	num_layers: 3
	num_heads: 8
	head_features: 64
	multiplier: 2

	# diffusion distribution config
	dist:
	sigma_data: 0.18 # placeholder for estimate_sigma_data set to false
	estimate_sigma_data: true # estimate sigma_data from the current batch if set to true
	mean: -3.0
	std: 1.0