使用此库可以用作“ kaldi I / O C ++ API”,可以读写“ .ark, .scp”格式,还可以使用kaldi :: [Matrix | Vector | ..]
编译要求:
cmake> = 3.0
数学库:mkl(推荐),安装conda,并使用它来安装mkl:(conda install mkl
默认情况下,mkl与conda一起安装),
当cmake时,conda
应该确保已经安装好,
cd kaldi-io
mkdir build && cd build
cmake -DCMAKE_INSTALL_PREFIX=.. .. # set install prefix as '../kaldi-io'
make
make install
结果:
- kaldi-io / lib:
- libkaldi_io_static.a
- libkaldi_io_shared.so
- kaldi-io /包括:
- kaldi-io.h(标头,可以根据需要进行修改)
- 子标题目录...
其他:
-
数学依赖关系是通过cmake从系统路径自动解决的
find_package
(cmake / Modules / FindBLAS.cmake)- 首选MKL / ATLAS / Accelerate(osx)
- 如果要设置特定的数学库:
#确保构建目录是干净的 cmake -DBLAS_VENDORS = [ ATLAS | MKL | OPEN | ..] .. #另外,可以设置自定义数学库搜索路径,例如: cmake -DBLAS_VENDORS = ATLAS -DBLAS_ATLAS_LIB_DIRS = ... / atlas / build / lib .. cmake -DBLAS_VENDORS = MKL -DBLAS_MKL_LIB_DIRS = / opt / intel / mkl / lib / intel64 ..
可以通过kaldi_io.py读取kaldi特征
#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import print_function from __future__ import division import numpy as np import sys, os, re, gzip, struct # Adding kaldi tools to shell path, # Select kaldi, if not 'KALDI_ROOT' in os.environ: # Default! To change run python with 'export KALDI_ROOT=/some_dir python' os.environ['KALDI_ROOT']='/mnt/matylda5/iveselyk/Tools/kaldi-trunk' # Add kaldi tools to path, path = os.popen('echo $KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI_ROOT/src/fstbin/:$KALDI_ROOT/src/gmmbin/:$KALDI_ROOT/src/featbin/:$KALDI_ROOT/src/lm/:$KALDI_ROOT/src/sgmmbin/:$KALDI_ROOT/src/sgmm2bin/:$KALDI_ROOT/src/fgmmbin/:$KALDI_ROOT/src/latbin/:$KALDI_ROOT/src/nnetbin:$KALDI_ROOT/src/nnet2bin:$KALDI_ROOT/src/nnet3bin:$KALDI_ROOT/src/online2bin/:$KALDI_ROOT/src/ivectorbin/:$KALDI_ROOT/src/lmbin/') os.environ['PATH'] = path.readline().strip() + ':' + os.environ['PATH'] path.close() # Define all custom exceptions, class UnsupportedDataType(Exception): pass class UnknownVectorHeader(Exception): pass class UnknownMatrixHeader(Exception): pass class BadSampleSize(Exception): pass class BadInputFormat(Exception): pass class SubprocessFailed(Exception): pass # Data-type independent helper functions, def open_or_fd(file, mode='rb'): """ fd = open_or_fd(file) Open file, gzipped file, pipe, or forward the file-descriptor. Eventually seeks in the 'file' argument contains ':offset' suffix. """ offset = None try: # strip 'ark:' prefix from r{x,w}filename (optional), if re.search('^(ark|scp)(,scp|,b|,t|,n?f|,n?p|,b?o|,n?s|,n?cs)*:', file): (prefix,file) = file.split(':',1) # separate offset from filename (optional), if re.search(':[0-9]+$', file): (file,offset) = file.rsplit(':',1) # input pipe? if file[-1] == '|': fd = popen(file[:-1], 'rb') # custom, # output pipe? elif file[0] == '|': fd = popen(file[1:], 'wb') # custom, # is it gzipped? elif file.split('.')[-1] == 'gz': fd = gzip.open(file, mode) # a normal file... else: fd = open(file, mode) except TypeError: # 'file' is opened file descriptor, fd = file # Eventually seek to offset, if offset != None: fd.seek(int(offset)) return fd # based on '/usr/local/lib/python3.6/os.py' def popen(cmd, mode="rb"): if not isinstance(cmd, str): raise TypeError("invalid cmd type (%s, expected string)" % type(cmd)) import subprocess, io, threading # cleanup function for subprocesses, def cleanup(proc, cmd): ret = proc.wait() if ret > 0: raise SubprocessFailed('cmd %s returned %d !' % (cmd,ret)) return # text-mode, if mode == "r": proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=sys.stderr) threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread, return io.TextIOWrapper(proc.stdout) elif mode == "w": proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stderr=sys.stderr) threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread, return io.TextIOWrapper(proc.stdin) # binary, elif mode == "rb": proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=sys.stderr) threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread, return proc.stdout elif mode == "wb": proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stderr=sys.stderr) threading.Thread(target=cleanup,args=(proc,cmd)).start() # clean-up thread, return proc.stdin # sanity, else: raise ValueError("invalid