ASLP-lab commited on
Commit
e66539c
·
verified ·
1 Parent(s): bc373f3

Upload 3 files

Browse files
sensevoice_small_yue/config.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ encoder: SenseVoiceEncoderSmall
2
+ encoder_conf:
3
+ output_size: 512
4
+ attention_heads: 4
5
+ linear_units: 2048
6
+ num_blocks: 50
7
+ tp_blocks: 20
8
+ dropout_rate: 0.1
9
+ positional_dropout_rate: 0.1
10
+ attention_dropout_rate: 0.1
11
+ input_layer: pe
12
+ pos_enc_class: SinusoidalPositionEncoder
13
+ normalize_before: true
14
+ kernel_size: 11
15
+ sanm_shfit: 0
16
+ selfattention_layer_type: sanm
17
+ model: SenseVoiceSmall
18
+ model_conf:
19
+ length_normalized_loss: true
20
+ sos: 1
21
+ eos: 2
22
+ ignore_id: -1
23
+ tokenizer: SentencepiecesTokenizer
24
+ tokenizer_conf:
25
+ bpemodel: /home/work_nfs17/asr_data/ckpt/SenseVoiceSmall/chn_jpn_yue_eng_ko_spectok.bpe.model
26
+ unk_symbol: <unk>
27
+ split_with_space: true
28
+ frontend: WavFrontend
29
+ frontend_conf:
30
+ fs: 16000
31
+ window: hamming
32
+ n_mels: 80
33
+ frame_length: 25
34
+ frame_shift: 10
35
+ lfr_m: 7
36
+ lfr_n: 6
37
+ cmvn_file: /home/work_nfs17/asr_data/ckpt/SenseVoiceSmall/am.mvn
38
+ dataset: SenseVoiceCTCDataset
39
+ dataset_conf:
40
+ index_ds: IndexDSJsonl
41
+ batch_sampler: BatchSampler
42
+ data_split_num: 1
43
+ batch_type: token
44
+ batch_size: 18000
45
+ max_token_length: 2000
46
+ min_token_length: 60
47
+ max_source_length: 2000
48
+ min_source_length: 60
49
+ max_target_length: 200
50
+ min_target_length: 0
51
+ shuffle: true
52
+ num_workers: 4
53
+ sos: 1
54
+ eos: 2
55
+ IndexDSJsonl: IndexDSJsonl
56
+ retry: 20
57
+ sort_size: 1024
58
+ train_conf:
59
+ accum_grad: 1
60
+ grad_clip: 5
61
+ max_epoch: 10
62
+ keep_nbest_models: 200
63
+ avg_nbest_model: 10
64
+ log_interval: 300
65
+ resume: true
66
+ validate_interval: 5000
67
+ save_checkpoint_interval: 5000
68
+ use_deepspeed: false
69
+ deepspeed_config: /home/work_nfs17/sywang/code/SenseVoice/deepspeed_conf/ds_stage1.json
70
+ optim: adamw
71
+ optim_conf:
72
+ lr: 0.0001
73
+ scheduler: warmuplr
74
+ scheduler_conf:
75
+ warmup_steps: 25000
76
+ specaug: SpecAugLFR
77
+ specaug_conf:
78
+ apply_time_warp: false
79
+ time_warp_window: 5
80
+ time_warp_mode: bicubic
81
+ apply_freq_mask: true
82
+ freq_mask_width_range:
83
+ - 0
84
+ - 30
85
+ lfr_rate: 6
86
+ num_freq_mask: 1
87
+ apply_time_mask: true
88
+ time_mask_width_range:
89
+ - 0
90
+ - 12
91
+ num_time_mask: 1
92
+ init_param: /home/work_nfs17/asr_data/ckpt/SenseVoiceSmall/model.pt
93
+ config: /home/work_nfs17/asr_data/ckpt/SenseVoiceSmall/config.yaml
94
+ is_training: true
95
+ trust_remote_code: false
96
+ train_data_set_list: /home/work_nfs17/sywang/code/SenseVoice/data/open/stage2.jsonl
97
+ valid_data_set_list: /home/work_nfs17/sywang/code/SenseVoice/data/val.jsonl
98
+ output_dir: ./outputs5
99
+ model_path: /home/work_nfs17/asr_data/ckpt/SenseVoiceSmall
100
+ device: cpu
sensevoice_small_yue/configuration.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "framework": "pytorch",
3
+ "task" : "auto-speech-recognition",
4
+ "model": {"type" : "funasr"},
5
+ "pipeline": {"type":"funasr-pipeline"},
6
+ "model_name_in_hub": {
7
+ "ms":"",
8
+ "hf":""},
9
+ "file_path_metas": {
10
+ "init_param":"model.pt",
11
+ "config":"config.yaml",
12
+ "tokenizer_conf": {"bpemodel": "chn_jpn_yue_eng_ko_spectok.bpe.model"},
13
+ "frontend_conf":{"cmvn_file": "am.mvn"}}
14
+ }
sensevoice_small_yue/model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38940ad5ae8a580efef0069f6a9c38ac67656dd8f046af9bd8d162cea3448706
3
+ size 2809073798