Commit
·
e18123e
1
Parent(s):
5fbd908
Add long chunk onnx
Browse files- am-onnx/decoder.chunk64.onnx +3 -0
- am-onnx/encoder.chunk64.onnx +3 -0
- am-onnx/joiner.chunk64.onnx +3 -0
- decode8.py → decode-8bit.py +0 -0
- decode-long-chunk.py +47 -0
am-onnx/decoder.chunk64.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cca47e861640eed6b0693fd68fa25a48ed584ab053e0db8259fa26cbf85054e
|
3 |
+
size 2093080
|
am-onnx/encoder.chunk64.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5423647f6fc579c765c494ef4f6747c3cfc1847d08691cceac7b6b4210620982
|
3 |
+
size 90989508
|
am-onnx/joiner.chunk64.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df4cd0d4609a5877a0b72a44c439b5baefd1788249cb59327dc3cf476ef34219
|
3 |
+
size 1026462
|
decode8.py → decode-8bit.py
RENAMED
File without changes
|
decode-long-chunk.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import wave
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Tuple
|
5 |
+
import sys
|
6 |
+
import numpy as np
|
7 |
+
import sherpa_onnx
|
8 |
+
|
9 |
+
def read_wave(wave_filename: str) -> Tuple[np.ndarray, int]:
|
10 |
+
with wave.open(wave_filename) as f:
|
11 |
+
assert f.getnchannels() == 1, f.getnchannels()
|
12 |
+
assert f.getsampwidth() == 2, f.getsampwidth() # it is in bytes
|
13 |
+
num_samples = f.getnframes()
|
14 |
+
samples = f.readframes(num_samples)
|
15 |
+
samples_int16 = np.frombuffer(samples, dtype=np.int16)
|
16 |
+
samples_float32 = samples_int16.astype(np.float32)
|
17 |
+
samples_float32 = samples_float32 / 32768
|
18 |
+
return samples_float32, f.getframerate()
|
19 |
+
|
20 |
+
def main():
|
21 |
+
|
22 |
+
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
|
23 |
+
encoder="am-onnx/encoder.chunk64.onnx",
|
24 |
+
decoder="am-onnx/decoder.chunk64.onnx",
|
25 |
+
joiner="am-onnx/joiner.chunk64.onnx",
|
26 |
+
tokens="lang/tokens.txt",
|
27 |
+
num_threads=4,
|
28 |
+
sample_rate=16000,
|
29 |
+
dither=3e-5,
|
30 |
+
decoding_method="modified_beam_search",
|
31 |
+
max_active_paths=10)
|
32 |
+
|
33 |
+
samples, sample_rate = read_wave("test.wav")
|
34 |
+
|
35 |
+
s = recognizer.create_stream()
|
36 |
+
s.accept_waveform(sample_rate, waveform=samples)
|
37 |
+
tail_padding = np.zeros(int(sample_rate * 2.0)).astype(np.float32)
|
38 |
+
s.accept_waveform(sample_rate, waveform=tail_padding)
|
39 |
+
s.input_finished()
|
40 |
+
|
41 |
+
while recognizer.is_ready(s):
|
42 |
+
recognizer.decode_stream(s)
|
43 |
+
print (recognizer.get_result(s))
|
44 |
+
|
45 |
+
if __name__ == "__main__":
|
46 |
+
main()
|
47 |
+
|