|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import numpy as np |
|
import sys, os, re, gzip, struct |
|
|
|
|
|
|
|
|
|
|
|
if not "KALDI_ROOT" in os.environ: |
|
|
|
os.environ["KALDI_ROOT"] = "/mnt/matylda5/iveselyk/Tools/kaldi-trunk" |
|
|
|
|
|
os.environ["PATH"] = ( |
|
os.popen( |
|
"echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/" |
|
) |
|
.readline() |
|
.strip() |
|
+ ":" |
|
+ os.environ["PATH"] |
|
) |
|
|
|
|
|
|
|
|
|
class UnsupportedDataType(Exception): |
|
pass |
|
|
|
|
|
class UnknownVectorHeader(Exception): |
|
pass |
|
|
|
|
|
class UnknownMatrixHeader(Exception): |
|
pass |
|
|
|
|
|
class BadSampleSize(Exception): |
|
pass |
|
|
|
|
|
class BadInputFormat(Exception): |
|
pass |
|
|
|
|
|
class SubprocessFailed(Exception): |
|
pass |
|
|
|
|
|
|
|
|
|
|
|
|
|
def open_or_fd(file, mode="rb"): |
|
"""fd = open_or_fd(file) |
|
Open file, gzipped file, pipe, or forward the file-descriptor. |
|
Eventually seeks in the 'file' argument contains ':offset' suffix. |
|
""" |
|
offset = None |
|
try: |
|
|
|
if re.search("^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:", file): |
|
(prefix, file) = file.split(":", 1) |
|
|
|
if re.search(":[0-9]+$", file): |
|
(file, offset) = file.rsplit(":", 1) |
|
|
|
if file[-1] == "|": |
|
fd = popen(file[:-1], "rb") |
|
|
|
elif file[0] == "|": |
|
fd = popen(file[1:], "wb") |
|
|
|
elif file.split(".")[-1] == "gz": |
|
fd = gzip.open(file, mode) |
|
|
|
else: |
|
fd = open(file, mode) |
|
except TypeError: |
|
|
|
fd = file |
|
|
|
if offset != None: |
|
fd.seek(int(offset)) |
|
return fd |
|
|
|
|
|
|
|
def popen(cmd, mode="rb"): |
|
if not isinstance(cmd, str): |
|
raise TypeError("invalid cmd type (%s, expected string)" % type(cmd)) |
|
|
|
import subprocess, io, threading |
|
|
|
|
|
def cleanup(proc, cmd): |
|
ret = proc.wait() |
|
if ret > 0: |
|
raise SubprocessFailed("cmd %s returned %d !" % (cmd, ret)) |
|
return |
|
|
|
|
|
if mode == "r": |
|
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) |
|
threading.Thread(target=cleanup, args=(proc, cmd)).start() |
|
return io.TextIOWrapper(proc.stdout) |
|
elif mode == "w": |
|
proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE) |
|
threading.Thread(target=cleanup, args=(proc, cmd)).start() |
|
return io.TextIOWrapper(proc.stdin) |
|
|
|
elif mode == "rb": |
|
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) |
|
threading.Thread(target=cleanup, args=(proc, cmd)).start() |
|
return proc.stdout |
|
elif mode == "wb": |
|
proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE) |
|
threading.Thread(target=cleanup, args=(proc, cmd)).start() |
|
return proc.stdin |
|
|
|
else: |
|
raise ValueError("invalid mode %s" % mode) |
|
|
|
|
|
def read_key(fd): |
|
"""[key] = read_key(fd) |
|
Read the utterance-key from the opened ark/stream descriptor 'fd'. |
|
""" |
|
key = "" |
|
while 1: |
|
char = fd.read(1).decode("latin1") |
|
if char == "": |
|
break |
|
if char == " ": |
|
break |
|
key += char |
|
key = key.strip() |
|
if key == "": |
|
return None |
|
assert re.match("^\S+$", key) != None |
|
return key |
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_ali_ark(file_or_fd): |
|
"""Alias to 'read_vec_int_ark()'""" |
|
return read_vec_int_ark(file_or_fd) |
|
|
|
|
|
def read_vec_int_ark(file_or_fd): |
|
"""generator(key,vec) = read_vec_int_ark(file_or_fd) |
|
Create generator of (key,vector<int>) tuples, which reads from the ark file/stream. |
|
file_or_fd : ark, gzipped ark, pipe or opened file descriptor. |
|
|
|
Read ark to a 'dictionary': |
|
d = { u:d for u,d in kaldi_io.read_vec_int_ark(file) } |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
key = read_key(fd) |
|
while key: |
|
ali = read_vec_int(fd) |
|
yield key, ali |
|
key = read_key(fd) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
def read_vec_int_scp(file_or_fd): |
|
"""generator(key,vec) = read_vec_int_scp(file_or_fd) |
|
Returns generator of (key,vector<int>) tuples, read according to kaldi scp. |
|
file_or_fd : scp, gzipped scp, pipe or opened file descriptor. |
|
|
|
Iterate the scp: |
|
for key,vec in kaldi_io.read_vec_int_scp(file): |
|
... |
|
|
|
Read scp to a 'dictionary': |
|
d = { key:vec for key,mat in kaldi_io.read_vec_int_scp(file) } |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
for line in fd: |
|
(key, rxfile) = line.decode().split(" ") |
|
vec = read_vec_int(rxfile) |
|
yield key, vec |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
def read_vec_int(file_or_fd): |
|
"""[int-vec] = read_vec_int(file_or_fd) |
|
Read kaldi integer vector, ascii or binary input, |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
binary = fd.read(2).decode() |
|
if binary == "\0B": |
|
assert fd.read(1).decode() == "\4" |
|
|
|
vec_size = np.frombuffer(fd.read(4), dtype="int32", count=1)[0] |
|
|
|
vec = np.frombuffer( |
|
fd.read(vec_size * 5), |
|
dtype=[("size", "int8"), ("value", "int32")], |
|
count=vec_size, |
|
) |
|
assert vec[0]["size"] == 4 |
|
ans = vec[:]["value"] |
|
else: |
|
arr = (binary + fd.readline().decode()).strip().split() |
|
try: |
|
arr.remove("[") |
|
arr.remove("]") |
|
except ValueError: |
|
pass |
|
ans = np.array(arr, dtype=int) |
|
if fd is not file_or_fd: |
|
fd.close() |
|
return ans |
|
|
|
|
|
|
|
def write_vec_int(file_or_fd, v, key=""): |
|
"""write_vec_int(f, v, key='') |
|
Write a binary kaldi integer vector to filename or stream. |
|
Arguments: |
|
file_or_fd : filename or opened file descriptor for writing, |
|
v : the vector to be stored, |
|
key (optional) : used for writing ark-file, the utterance-id gets written before the vector. |
|
|
|
Example of writing single vector: |
|
kaldi_io.write_vec_int(filename, vec) |
|
|
|
Example of writing arkfile: |
|
with open(ark_file,'w') as f: |
|
for key,vec in dict.iteritems(): |
|
kaldi_io.write_vec_flt(f, vec, key=key) |
|
""" |
|
fd = open_or_fd(file_or_fd, mode="wb") |
|
if sys.version_info[0] == 3: |
|
assert fd.mode == "wb" |
|
try: |
|
if key != "": |
|
fd.write( |
|
(key + " ").encode("latin1") |
|
) |
|
fd.write("\0B".encode()) |
|
|
|
fd.write("\4".encode()) |
|
fd.write(struct.pack(np.dtype("int32").char, v.shape[0])) |
|
|
|
for i in range(len(v)): |
|
fd.write("\4".encode()) |
|
fd.write(struct.pack(np.dtype("int32").char, v[i])) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_vec_flt_scp(file_or_fd): |
|
"""generator(key,mat) = read_vec_flt_scp(file_or_fd) |
|
Returns generator of (key,vector) tuples, read according to kaldi scp. |
|
file_or_fd : scp, gzipped scp, pipe or opened file descriptor. |
|
|
|
Iterate the scp: |
|
for key,vec in kaldi_io.read_vec_flt_scp(file): |
|
... |
|
|
|
Read scp to a 'dictionary': |
|
d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) } |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
for line in fd: |
|
(key, rxfile) = line.decode().split(" ") |
|
vec = read_vec_flt(rxfile) |
|
yield key, vec |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
def read_vec_flt_ark(file_or_fd): |
|
"""generator(key,vec) = read_vec_flt_ark(file_or_fd) |
|
Create generator of (key,vector<float>) tuples, reading from an ark file/stream. |
|
file_or_fd : ark, gzipped ark, pipe or opened file descriptor. |
|
|
|
Read ark to a 'dictionary': |
|
d = { u:d for u,d in kaldi_io.read_vec_flt_ark(file) } |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
key = read_key(fd) |
|
while key: |
|
ali = read_vec_flt(fd) |
|
yield key, ali |
|
key = read_key(fd) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
def read_vec_flt(file_or_fd): |
|
"""[flt-vec] = read_vec_flt(file_or_fd) |
|
Read kaldi float vector, ascii or binary input, |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
binary = fd.read(2).decode() |
|
if binary == "\0B": |
|
|
|
header = fd.read(3).decode() |
|
if header == "FV ": |
|
sample_size = 4 |
|
elif header == "DV ": |
|
sample_size = 8 |
|
else: |
|
raise UnknownVectorHeader("The header contained '%s'" % header) |
|
assert sample_size > 0 |
|
|
|
assert fd.read(1).decode() == "\4" |
|
|
|
vec_size = np.frombuffer(fd.read(4), dtype="int32", count=1)[0] |
|
|
|
buf = fd.read(vec_size * sample_size) |
|
if sample_size == 4: |
|
ans = np.frombuffer(buf, dtype="float32") |
|
elif sample_size == 8: |
|
ans = np.frombuffer(buf, dtype="float64") |
|
else: |
|
raise BadSampleSize |
|
return ans |
|
else: |
|
arr = (binary + fd.readline().decode()).strip().split() |
|
try: |
|
arr.remove("[") |
|
arr.remove("]") |
|
except ValueError: |
|
pass |
|
ans = np.array(arr, dtype=float) |
|
if fd is not file_or_fd: |
|
fd.close() |
|
return ans |
|
|
|
|
|
|
|
def write_vec_flt(file_or_fd, v, key=""): |
|
"""write_vec_flt(f, v, key='') |
|
Write a binary kaldi vector to filename or stream. Supports 32bit and 64bit floats. |
|
Arguments: |
|
file_or_fd : filename or opened file descriptor for writing, |
|
v : the vector to be stored, |
|
key (optional) : used for writing ark-file, the utterance-id gets written before the vector. |
|
|
|
Example of writing single vector: |
|
kaldi_io.write_vec_flt(filename, vec) |
|
|
|
Example of writing arkfile: |
|
with open(ark_file,'w') as f: |
|
for key,vec in dict.iteritems(): |
|
kaldi_io.write_vec_flt(f, vec, key=key) |
|
""" |
|
fd = open_or_fd(file_or_fd, mode="wb") |
|
if sys.version_info[0] == 3: |
|
assert fd.mode == "wb" |
|
try: |
|
if key != "": |
|
fd.write( |
|
(key + " ").encode("latin1") |
|
) |
|
fd.write("\0B".encode()) |
|
|
|
if v.dtype == "float32": |
|
fd.write("FV ".encode()) |
|
elif v.dtype == "float64": |
|
fd.write("DV ".encode()) |
|
else: |
|
raise UnsupportedDataType( |
|
"'%s', please use 'float32' or 'float64'" % v.dtype |
|
) |
|
|
|
fd.write("\04".encode()) |
|
fd.write(struct.pack(np.dtype("uint32").char, v.shape[0])) |
|
|
|
fd.write(v.tobytes()) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_mat_scp(file_or_fd): |
|
"""generator(key,mat) = read_mat_scp(file_or_fd) |
|
Returns generator of (key,matrix) tuples, read according to kaldi scp. |
|
file_or_fd : scp, gzipped scp, pipe or opened file descriptor. |
|
|
|
Iterate the scp: |
|
for key,mat in kaldi_io.read_mat_scp(file): |
|
... |
|
|
|
Read scp to a 'dictionary': |
|
d = { key:mat for key,mat in kaldi_io.read_mat_scp(file) } |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
for line in fd: |
|
(key, rxfile) = line.decode().split(" ") |
|
mat = read_mat(rxfile) |
|
yield key, mat |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
def read_mat_ark(file_or_fd): |
|
"""generator(key,mat) = read_mat_ark(file_or_fd) |
|
Returns generator of (key,matrix) tuples, read from ark file/stream. |
|
file_or_fd : scp, gzipped scp, pipe or opened file descriptor. |
|
|
|
Iterate the ark: |
|
for key,mat in kaldi_io.read_mat_ark(file): |
|
... |
|
|
|
Read ark to a 'dictionary': |
|
d = { key:mat for key,mat in kaldi_io.read_mat_ark(file) } |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
key = read_key(fd) |
|
while key: |
|
mat = read_mat(fd) |
|
yield key, mat |
|
key = read_key(fd) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
def read_mat(file_or_fd): |
|
"""[mat] = read_mat(file_or_fd) |
|
Reads single kaldi matrix, supports ascii and binary. |
|
file_or_fd : file, gzipped file, pipe or opened file descriptor. |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
binary = fd.read(2).decode() |
|
if binary == "\0B": |
|
mat = _read_mat_binary(fd) |
|
else: |
|
assert binary == " [" |
|
mat = _read_mat_ascii(fd) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
return mat |
|
|
|
|
|
def _read_mat_binary(fd): |
|
|
|
header = fd.read(3).decode() |
|
|
|
if header.startswith("CM"): |
|
return _read_compressed_mat(fd, header) |
|
elif header == "FM ": |
|
sample_size = 4 |
|
elif header == "DM ": |
|
sample_size = 8 |
|
else: |
|
raise UnknownMatrixHeader("The header contained '%s'" % header) |
|
assert sample_size > 0 |
|
|
|
s1, rows, s2, cols = np.frombuffer( |
|
fd.read(10), dtype="int8,int32,int8,int32", count=1 |
|
)[0] |
|
|
|
buf = fd.read(rows * cols * sample_size) |
|
if sample_size == 4: |
|
vec = np.frombuffer(buf, dtype="float32") |
|
elif sample_size == 8: |
|
vec = np.frombuffer(buf, dtype="float64") |
|
else: |
|
raise BadSampleSize |
|
mat = np.reshape(vec, (rows, cols)) |
|
return mat |
|
|
|
|
|
def _read_mat_ascii(fd): |
|
rows = [] |
|
while 1: |
|
line = fd.readline().decode() |
|
if len(line) == 0: |
|
raise BadInputFormat |
|
if len(line.strip()) == 0: |
|
continue |
|
arr = line.strip().split() |
|
if arr[-1] != "]": |
|
rows.append(np.array(arr, dtype="float32")) |
|
else: |
|
rows.append(np.array(arr[:-1], dtype="float32")) |
|
mat = np.vstack(rows) |
|
return mat |
|
|
|
|
|
def _read_compressed_mat(fd, format): |
|
"""Read a compressed matrix, |
|
see: https://github.com/kaldi-asr/kaldi/blob/master/src/matrix/compressed-matrix.h |
|
methods: CompressedMatrix::Read(...), CompressedMatrix::CopyToMat(...), |
|
""" |
|
assert format == "CM " |
|
|
|
|
|
global_header = np.dtype( |
|
[ |
|
("minvalue", "float32"), |
|
("range", "float32"), |
|
("num_rows", "int32"), |
|
("num_cols", "int32"), |
|
] |
|
) |
|
per_col_header = np.dtype( |
|
[ |
|
("percentile_0", "uint16"), |
|
("percentile_25", "uint16"), |
|
("percentile_75", "uint16"), |
|
("percentile_100", "uint16"), |
|
] |
|
) |
|
|
|
|
|
def uint16_to_float(value, min, range): |
|
return np.float32(min + range * 1.52590218966964e-05 * value) |
|
|
|
|
|
def uint8_to_float_v2(vec, p0, p25, p75, p100): |
|
|
|
mask_0_64 = vec <= 64 |
|
mask_193_255 = vec > 192 |
|
mask_65_192 = ~(mask_0_64 | mask_193_255) |
|
|
|
|
|
|
|
|
|
ans = np.empty(len(vec), dtype="float32") |
|
ans[mask_0_64] = p0 + (p25 - p0) / 64.0 * vec[mask_0_64] |
|
ans[mask_65_192] = p25 + (p75 - p25) / 128.0 * (vec[mask_65_192] - 64) |
|
ans[mask_193_255] = p75 + (p100 - p75) / 63.0 * (vec[mask_193_255] - 192) |
|
return ans |
|
|
|
|
|
globmin, globrange, rows, cols = np.frombuffer( |
|
fd.read(16), dtype=global_header, count=1 |
|
)[0] |
|
|
|
|
|
|
|
col_headers = np.frombuffer(fd.read(cols * 8), dtype=per_col_header, count=cols) |
|
data = np.reshape( |
|
np.frombuffer(fd.read(cols * rows), dtype="uint8", count=cols * rows), |
|
newshape=(cols, rows), |
|
) |
|
|
|
mat = np.empty((cols, rows), dtype="float32") |
|
for i, col_header in enumerate(col_headers): |
|
col_header_flt = [ |
|
uint16_to_float(percentile, globmin, globrange) for percentile in col_header |
|
] |
|
mat[i] = uint8_to_float_v2(data[i], *col_header_flt) |
|
|
|
return mat.T |
|
|
|
|
|
def write_ark_scp(key, mat, ark_fout, scp_out): |
|
mat_offset = write_mat(ark_fout, mat, key) |
|
scp_line = "{}\t{}:{}".format(key, ark_fout.name, mat_offset) |
|
scp_out.write(scp_line) |
|
scp_out.write("\n") |
|
|
|
|
|
|
|
def write_mat(file_or_fd, m, key=""): |
|
"""write_mat(f, m, key='') |
|
Write a binary kaldi matrix to filename or stream. Supports 32bit and 64bit floats. |
|
Arguments: |
|
file_or_fd : filename of opened file descriptor for writing, |
|
m : the matrix to be stored, |
|
key (optional) : used for writing ark-file, the utterance-id gets written before the matrix. |
|
|
|
Example of writing single matrix: |
|
kaldi_io.write_mat(filename, mat) |
|
|
|
Example of writing arkfile: |
|
with open(ark_file,'w') as f: |
|
for key,mat in dict.iteritems(): |
|
kaldi_io.write_mat(f, mat, key=key) |
|
""" |
|
mat_offset = 0 |
|
fd = open_or_fd(file_or_fd, mode="wb") |
|
if sys.version_info[0] == 3: |
|
assert fd.mode == "wb" |
|
try: |
|
if key != "": |
|
fd.write( |
|
(key + " ").encode("latin1") |
|
) |
|
mat_offset = fd.tell() |
|
fd.write("\0B".encode()) |
|
|
|
if m.dtype == "float32": |
|
fd.write("FM ".encode()) |
|
elif m.dtype == "float64": |
|
fd.write("DM ".encode()) |
|
else: |
|
raise UnsupportedDataType( |
|
"'%s', please use 'float32' or 'float64'" % m.dtype |
|
) |
|
|
|
fd.write("\04".encode()) |
|
fd.write(struct.pack(np.dtype("uint32").char, m.shape[0])) |
|
fd.write("\04".encode()) |
|
fd.write(struct.pack(np.dtype("uint32").char, m.shape[1])) |
|
|
|
fd.write(m.tobytes()) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
return mat_offset |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_cnet_ark(file_or_fd): |
|
"""Alias of function 'read_post_ark()', 'cnet' = confusion network""" |
|
return read_post_ark(file_or_fd) |
|
|
|
|
|
def read_post_ark(file_or_fd): |
|
"""generator(key,vec<vec<int,float>>) = read_post_ark(file) |
|
Returns generator of (key,posterior) tuples, read from ark file. |
|
file_or_fd : ark, gzipped ark, pipe or opened file descriptor. |
|
|
|
Iterate the ark: |
|
for key,post in kaldi_io.read_post_ark(file): |
|
... |
|
|
|
Read ark to a 'dictionary': |
|
d = { key:post for key,post in kaldi_io.read_post_ark(file) } |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
key = read_key(fd) |
|
while key: |
|
post = read_post(fd) |
|
yield key, post |
|
key = read_key(fd) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
def read_post(file_or_fd): |
|
"""[post] = read_post(file_or_fd) |
|
Reads single kaldi 'Posterior' in binary format. |
|
|
|
The 'Posterior' is C++ type 'vector<vector<tuple<int,float> > >', |
|
the outer-vector is usually time axis, inner-vector are the records |
|
at given time, and the tuple is composed of an 'index' (integer) |
|
and a 'float-value'. The 'float-value' can represent a probability |
|
or any other numeric value. |
|
|
|
Returns vector of vectors of tuples. |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
ans = [] |
|
binary = fd.read(2).decode() |
|
assert binary == "\0B" |
|
|
|
assert fd.read(1).decode() == "\4" |
|
|
|
outer_vec_size = np.frombuffer(fd.read(4), dtype="int32", count=1)[ |
|
0 |
|
] |
|
|
|
|
|
for i in range(outer_vec_size): |
|
assert fd.read(1).decode() == "\4" |
|
|
|
inner_vec_size = np.frombuffer(fd.read(4), dtype="int32", count=1)[ |
|
0 |
|
] |
|
data = np.frombuffer( |
|
fd.read(inner_vec_size * 10), |
|
dtype=[ |
|
("size_idx", "int8"), |
|
("idx", "int32"), |
|
("size_post", "int8"), |
|
("post", "float32"), |
|
], |
|
count=inner_vec_size, |
|
) |
|
assert data[0]["size_idx"] == 4 |
|
assert data[0]["size_post"] == 4 |
|
ans.append(data[["idx", "post"]].tolist()) |
|
|
|
if fd is not file_or_fd: |
|
fd.close() |
|
return ans |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_cntime_ark(file_or_fd): |
|
"""generator(key,vec<tuple<float,float>>) = read_cntime_ark(file_or_fd) |
|
Returns generator of (key,cntime) tuples, read from ark file. |
|
file_or_fd : file, gzipped file, pipe or opened file descriptor. |
|
|
|
Iterate the ark: |
|
for key,time in kaldi_io.read_cntime_ark(file): |
|
... |
|
|
|
Read ark to a 'dictionary': |
|
d = { key:time for key,time in kaldi_io.read_post_ark(file) } |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
try: |
|
key = read_key(fd) |
|
while key: |
|
cntime = read_cntime(fd) |
|
yield key, cntime |
|
key = read_key(fd) |
|
finally: |
|
if fd is not file_or_fd: |
|
fd.close() |
|
|
|
|
|
def read_cntime(file_or_fd): |
|
"""[cntime] = read_cntime(file_or_fd) |
|
Reads single kaldi 'Confusion Network time info', in binary format: |
|
C++ type: vector<tuple<float,float> >. |
|
(begin/end times of bins at the confusion network). |
|
|
|
Binary layout is '<num-bins> <beg1> <end1> <beg2> <end2> ...' |
|
|
|
file_or_fd : file, gzipped file, pipe or opened file descriptor. |
|
|
|
Returns vector of tuples. |
|
""" |
|
fd = open_or_fd(file_or_fd) |
|
binary = fd.read(2).decode() |
|
assert binary == "\0B" |
|
|
|
|
|
assert fd.read(1).decode() == "\4" |
|
|
|
vec_size = np.frombuffer(fd.read(4), dtype="int32", count=1)[ |
|
0 |
|
] |
|
|
|
data = np.frombuffer( |
|
fd.read(vec_size * 10), |
|
dtype=[ |
|
("size_beg", "int8"), |
|
("t_beg", "float32"), |
|
("size_end", "int8"), |
|
("t_end", "float32"), |
|
], |
|
count=vec_size, |
|
) |
|
assert data[0]["size_beg"] == 4 |
|
assert data[0]["size_end"] == 4 |
|
ans = data[["t_beg", "t_end"]].tolist() |
|
|
|
if fd is not file_or_fd: |
|
fd.close() |
|
return ans |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_segments_as_bool_vec(segments_file): |
|
"""[ bool_vec ] = read_segments_as_bool_vec(segments_file) |
|
using kaldi 'segments' file for 1 wav, format : '<utt> <rec> <t-beg> <t-end>' |
|
- t-beg, t-end is in seconds, |
|
- assumed 100 frames/second, |
|
""" |
|
segs = np.loadtxt(segments_file, dtype="object,object,f,f", ndmin=1) |
|
|
|
assert len(segs) > 0 |
|
assert ( |
|
len(np.unique([rec[1] for rec in segs])) == 1 |
|
) |
|
|
|
start = np.rint([100 * rec[2] for rec in segs]).astype(int) |
|
end = np.rint([100 * rec[3] for rec in segs]).astype(int) |
|
|
|
frms = np.repeat( |
|
np.r_[np.tile([False, True], len(end)), False], |
|
np.r_[np.c_[start - np.r_[0, end[:-1]], end - start].flat, 0], |
|
) |
|
assert np.sum(end - start) == np.sum(frms) |
|
return frms |
|
|