Spaces:
Sleeping
Sleeping
update
Browse files
toolbox/k2_sherpa/examples.py
CHANGED
|
@@ -11,4 +11,12 @@ examples = [
|
|
| 11 |
"Yes",
|
| 12 |
"./data/test_wavs/paraformer-zh/si_chuan_hua.wav",
|
| 13 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
]
|
|
|
|
| 11 |
"Yes",
|
| 12 |
"./data/test_wavs/paraformer-zh/si_chuan_hua.wav",
|
| 13 |
],
|
| 14 |
+
[
|
| 15 |
+
"English",
|
| 16 |
+
"csukuangfj/sherpa-onnx-whisper-tiny.en",
|
| 17 |
+
"greedy_search",
|
| 18 |
+
4,
|
| 19 |
+
"Yes",
|
| 20 |
+
"./data/test_wavs/librispeech/1089-134686-0001.wav",
|
| 21 |
+
],
|
| 22 |
]
|
toolbox/k2_sherpa/nn_models.py
CHANGED
|
@@ -150,6 +150,16 @@ model_map = {
|
|
| 150 |
},
|
| 151 |
],
|
| 152 |
"English": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
{
|
| 154 |
"repo_id": "csukuangfj/sherpa-onnx-paraformer-en-2024-03-09",
|
| 155 |
"nn_model_file": "model.int8.onnx",
|
|
@@ -307,6 +317,20 @@ def load_sherpa_offline_recognizer_from_transducer(encoder_model_file: str,
|
|
| 307 |
return recognizer
|
| 308 |
|
| 309 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 310 |
def load_recognizer(local_model_dir: Path,
|
| 311 |
decoding_method: str = "greedy_search",
|
| 312 |
num_active_paths: int = 4,
|
|
|
|
| 150 |
},
|
| 151 |
],
|
| 152 |
"English": [
|
| 153 |
+
{
|
| 154 |
+
"repo_id": "csukuangfj/sherpa-onnx-whisper-tiny.en",
|
| 155 |
+
"encoder_model_file": "tiny.en-encoder.int8.onnx",
|
| 156 |
+
"encoder_model_file_sub_folder": ".",
|
| 157 |
+
"decoder_model_file": "tiny.en-decoder.int8.onnx",
|
| 158 |
+
"decoder_model_file_sub_folder": ".",
|
| 159 |
+
"tokens_file": "tiny.en-tokens.txt",
|
| 160 |
+
"tokens_file_sub_folder": ".",
|
| 161 |
+
"loader": "load_sherpa_offline_recognizer_from_whisper",
|
| 162 |
+
},
|
| 163 |
{
|
| 164 |
"repo_id": "csukuangfj/sherpa-onnx-paraformer-en-2024-03-09",
|
| 165 |
"nn_model_file": "model.int8.onnx",
|
|
|
|
| 317 |
return recognizer
|
| 318 |
|
| 319 |
|
| 320 |
+
def load_sherpa_offline_recognizer_from_whisper(encoder_model_file: str,
|
| 321 |
+
decoder_model_file: str,
|
| 322 |
+
tokens_file: str,
|
| 323 |
+
num_threads: int = 2,
|
| 324 |
+
):
|
| 325 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
|
| 326 |
+
encoder=encoder_model_file,
|
| 327 |
+
decoder=decoder_model_file,
|
| 328 |
+
tokens=tokens_file,
|
| 329 |
+
num_threads=num_threads,
|
| 330 |
+
)
|
| 331 |
+
return recognizer
|
| 332 |
+
|
| 333 |
+
|
| 334 |
def load_recognizer(local_model_dir: Path,
|
| 335 |
decoding_method: str = "greedy_search",
|
| 336 |
num_active_paths: int = 4,
|