fsmn-vad-onnx / vad.yaml
游雁
create
d4c46ef
input_size: null
frontend: wav_frontend
frontend_conf:
fs: 16000
window: hamming
n_mels: 80
frame_length: 25
frame_shift: 10
dither: 0.0
lfr_m: 5
lfr_n: 1
model: e2evad
encoder: fsmn
encoder_conf:
input_dim: 400
input_affine_dim: 140
fsmn_layers: 4
linear_dim: 250
proj_dim: 128
lorder: 20
rorder: 0
lstride: 1
rstride: 0
output_affine_dim: 140
output_dim: 248
vad_post_conf:
sample_rate: 16000
detect_mode: 1
snr_mode: 0
max_end_silence_time: 800
max_start_silence_time: 3000
do_start_point_detection: True
do_end_point_detection: True
window_size_ms: 200
sil_to_speech_time_thres: 150
speech_to_sil_time_thres: 150
speech_2_noise_ratio: 1.0
do_extend: 1
lookback_time_start_point: 200
lookahead_time_end_point: 100
max_single_segment_time: 60000
snr_thres: -100.0
noise_frame_num_used_for_snr: 100
decibel_thres: -100.0
speech_noise_thres: 0.6
fe_prior_thres: 0.0001
silence_pdf_num: 1
sil_pdf_ids: [0]
speech_noise_thresh_low: -0.1
speech_noise_thresh_high: 0.3
output_frame_probs: False
frame_in_ms: 10
frame_length_ms: 25