import numpy as np
from scipy.io import loadmat
#使用模块scipy.io的loadmat可以实现python对mat数据的读写。
from scipy.signal import lfilter, hamming
#lfilter数字滤波器 hamming汉明窗
from scipy.fftpack import fft
#快速傅里叶变换之后生成复数
from scipy.fftpack.realtransforms import dct
#离散余弦变换(DCT),大多书自然信号(声音、图像)的能量都集中在离散余弦变换后的低频部分。
from scikits.talkbox import segment_axis
from mel import hz2mel
#从赫兹转换为梅尔刻度
def trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfilt, nlogfilt):
"""计算用于MFCC的三角滤波器."""
# 过滤器总数
nfilt = nlinfilt + nlogfilt
#------------------------
# 计算滤波器组
#------------------------
# 计算谱中三角滤波器的起点\中间点\终止点
# 范围
freqs = np.zeros(nfilt+2)
freqs[:nlinfilt] = lowfreq + np.arange(nlinfilt) * linsc
freqs[nlinfilt:] = freqs[nlinfilt-1] * logsc ** np.arange(1, nlogfilt + 3)
heights = 2./(freqs[2:] - freqs[0:-2])
# 计算滤波器组系数 (in fft domain, in bins)
fbank = np.zeros((nfilt, nfft))
# FFT bins (in Hz)
nfreqs = np.arange(nfft) / (1. * nfft) * fs
for i in range(nfilt):
low = freqs[i]
cen = freqs[i+1]
hi = freqs[i+2]
lid = np.arange(np.floor(low * nfft / fs) + 1,
np.floor(cen * nfft / fs) + 1, dtype=np.int)
lslope = heights[i] / (cen - low)
rid = np.arange(np.floor(cen * nfft / fs) + 1,
np.floor(hi * nfft / fs) + 1, dtype=np.int)
rslope = heights[i] / (hi - cen)
fbank[i][lid] = lslope * (nfreqs[lid] - low)
fbank[i][rid] = rslope * (hi - nfreqs[rid])
return fbank, freqs
#返回 滤波器和频率
def mfcc(input, nwin=256, nfft=512, fs=16000, nceps=13):
"""计算Mel频率倒谱系数.
参数
----------
输入: ndarray
计算系数的输入
返回
-------
ceps: ndarray
Mel倒谱系数
mspec: ndarray
mel域中的对数谱。
Notes
-----
MFCC计算如下:
*时域预处理(预强调)
*用Hamming窗加窗计算频谱幅度
*在谱域中用三角形滤波信号滤波器组,其滤波器在mel比例,并且在mel比例中具有相等的带宽
*计算对数谱的DCT
References
----------
.. [1] S.B. Davis and P. Mermelstein, "Comparison of parametric
representations for monosyllabic word recognition in continuously
spoken sentences", IEEE Trans. Acoustics. Speech, Signal Proc.
ASSP-28 (4): 357-366, August 1980."""
# MFCC参数:取自听觉工具箱
over = nwin - 160
# 预加重因子(考虑嘴唇水平辐射的-6dB/倍频程衰减)
prefac = 0.97
#lowfreq = 400 / 3.
lowfreq = 133.33
#highfreq = 6855.4976
linsc = 200/3.
logsc = 1.0711703
nlinfil = 13
nlogfil = 27
nfil = nlinfil + nlogfil
w = hamming(nwin, sym=0)
fbank = trfbank(fs, nfft, lowfreq, linsc, logsc, nlinfil, nlogfil)[0]
#------------------
# 计算MFCC
#------------------
extract = preemp(input, prefac)
framed = segment_axis(extract, nwin, over) * w
# 计算频谱幅度
spec = np.abs(fft(framed, nfft, axis=-1))
# 通过三角形滤波器组过滤频谱
mspec = np.log10(np.dot(spec, fbank.T))
# 使用DCT“压缩”系数(频谱->倒谱域)
ceps = dct(mspec, type=2, norm='ortho', axis=-1)[:, :nceps]
return ceps, mspec, spec
def preemp(input, p):
"""预加重滤波器."""
return lfilter([1., -p], 1, input)
if __name__ == '__main__':
extract = loadmat('extract.mat')['extract']
ceps = mfcc(extract)
关于scikits.talkbox.feature.mfcc的学习解释
最新推荐文章于 2021-02-12 20:17:08 发布