Commit
Β·
5fbd908
1
Parent(s):
dfea5a7
Update to version 0.54
Browse files- README.md +5 -1
- {am β am-onnx}/decoder.int8.onnx +2 -2
- {am β am-onnx}/decoder.onnx +2 -2
- {am β am-onnx}/encoder.int8.onnx +2 -2
- {am β am-onnx}/encoder.onnx +2 -2
- {am β am-onnx}/joiner.int8.onnx +2 -2
- {am β am-onnx}/joiner.onnx +2 -2
- am/epoch-32-avg-2.pt +3 -0
- decode.py +7 -5
- decode8.py +47 -0
README.md
CHANGED
@@ -21,13 +21,17 @@ model-index:
|
|
21 |
metrics:
|
22 |
- name: Test WER
|
23 |
type: wer
|
24 |
-
value:
|
25 |
---
|
26 |
|
27 |
Small Zipformer2 model trained with k2-fsa/icefall on Russian data streaming version
|
28 |
|
|
|
|
|
29 |
Links:
|
30 |
|
31 |
<https://alphacephei.com/vosk>
|
32 |
|
33 |
<https://github.com/k2-fsa/icefall>
|
|
|
|
|
|
21 |
metrics:
|
22 |
- name: Test WER
|
23 |
type: wer
|
24 |
+
value: 11.3
|
25 |
---
|
26 |
|
27 |
Small Zipformer2 model trained with k2-fsa/icefall on Russian data streaming version
|
28 |
|
29 |
+
Version 0.54
|
30 |
+
|
31 |
Links:
|
32 |
|
33 |
<https://alphacephei.com/vosk>
|
34 |
|
35 |
<https://github.com/k2-fsa/icefall>
|
36 |
+
|
37 |
+
<https://github.com/k2-fsa/sherpa-onnx>
|
{am β am-onnx}/decoder.int8.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b0df458692e1d090075c8249001136ef05240dd0d726a6b56552fd46c538b2d
|
3 |
+
size 1326289
|
{am β am-onnx}/decoder.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89b3088a9e20e1ef7f2e85ce1a3478afe6a9c4ac57369cabcc4beb8e95328ea0
|
3 |
+
size 2093080
|
{am β am-onnx}/encoder.int8.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0db705e94ec35d803b1df4f40cda23d064e1142977c80ab288430b109777a9d
|
3 |
+
size 26214060
|
{am β am-onnx}/encoder.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9c27453e618bc97cf8a10169f34c104bd478166522907fcd122a46a88c78c69
|
3 |
+
size 90994145
|
{am β am-onnx}/joiner.int8.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b55784b071ab7512eab4c7c44e4f5478284ef33c83562cc6a249b972515a31e5
|
3 |
+
size 259417
|
{am β am-onnx}/joiner.onnx
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dde0c7f3be0a16113a3e042c79a492c48667c07a8c1e9422ffe81c768aad4838
|
3 |
+
size 1026462
|
am/epoch-32-avg-2.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4ea2bee39c93481b3932e4cb4db326b0f91dc9dc0c87ef9bba54089b8f491ba
|
3 |
+
size 380913148
|
decode.py
CHANGED
@@ -20,19 +20,21 @@ def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
|
|
20 |
def main():
|
21 |
|
22 |
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
|
23 |
-
encoder="am/encoder.onnx",
|
24 |
-
decoder="am/decoder.onnx",
|
25 |
-
joiner="am/joiner.onnx",
|
26 |
tokens="lang/tokens.txt",
|
27 |
num_threads=4,
|
28 |
sample_rate=16000,
|
29 |
-
|
|
|
|
|
30 |
|
31 |
samples, sample_rate = read_wave("test.wav")
|
32 |
|
33 |
s = recognizer.create_stream()
|
34 |
s.accept_waveform(sample_rate, waveform=samples)
|
35 |
-
tail_padding = np.zeros(int(sample_rate * 0.
|
36 |
s.accept_waveform(sample_rate, waveform=tail_padding)
|
37 |
s.input_finished()
|
38 |
|
|
|
20 |
def main():
|
21 |
|
22 |
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
|
23 |
+
encoder="am-onnx/encoder.onnx",
|
24 |
+
decoder="am-onnx/decoder.onnx",
|
25 |
+
joiner="am-onnx/joiner.onnx",
|
26 |
tokens="lang/tokens.txt",
|
27 |
num_threads=4,
|
28 |
sample_rate=16000,
|
29 |
+
dither=3e-5,
|
30 |
+
decoding_method="modified_beam_search",
|
31 |
+
max_active_paths=10)
|
32 |
|
33 |
samples, sample_rate = read_wave("test.wav")
|
34 |
|
35 |
s = recognizer.create_stream()
|
36 |
s.accept_waveform(sample_rate, waveform=samples)
|
37 |
+
tail_padding = np.zeros(int(sample_rate * 0.6)).astype(np.float32)
|
38 |
s.accept_waveform(sample_rate, waveform=tail_padding)
|
39 |
s.input_finished()
|
40 |
|
decode8.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import wave
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Tuple
|
5 |
+
import sys
|
6 |
+
import numpy as np
|
7 |
+
import sherpa_onnx
|
8 |
+
|
9 |
+
def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
|
10 |
+
with wave.open(wave_filename) as f:
|
11 |
+
assert f.getnchannels() == 1, f.getnchannels()
|
12 |
+
assert f.getsampwidth() == 2, f.getsampwidth() # it is in bytes
|
13 |
+
num_samples = f.getnframes()
|
14 |
+
samples = f.readframes(num_samples)
|
15 |
+
samples_int16 = np.frombuffer(samples, dtype=np.int16)
|
16 |
+
samples_float32 = samples_int16.astype(np.float32)
|
17 |
+
samples_float32 = samples_float32 / 32768
|
18 |
+
return samples_float32, f.getframerate()
|
19 |
+
|
20 |
+
def main():
|
21 |
+
|
22 |
+
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
|
23 |
+
encoder="am-onnx/encoder.int8.onnx",
|
24 |
+
decoder="am-onnx/decoder.int8.onnx",
|
25 |
+
joiner="am-onnx/joiner.int8.onnx",
|
26 |
+
tokens="lang/tokens.txt",
|
27 |
+
num_threads=4,
|
28 |
+
sample_rate=16000,
|
29 |
+
dither=3e-5,
|
30 |
+
decoding_method="modified_beam_search",
|
31 |
+
max_active_paths=10)
|
32 |
+
|
33 |
+
samples, sample_rate = read_wave("test.wav")
|
34 |
+
|
35 |
+
s = recognizer.create_stream()
|
36 |
+
s.accept_waveform(sample_rate, waveform=samples)
|
37 |
+
tail_padding = np.zeros(int(sample_rate * 0.6)).astype(np.float32)
|
38 |
+
s.accept_waveform(sample_rate, waveform=tail_padding)
|
39 |
+
s.input_finished()
|
40 |
+
|
41 |
+
while recognizer.is_ready(s):
|
42 |
+
recognizer.decode_stream(s)
|
43 |
+
print (recognizer.get_result(s))
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
main()
|
47 |
+
|