#!/usr/bin/env python
import os
from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank
import scipy.io.wavfile as wav
import pickle
import numpy as np
import matplotlib.pyplot as plt
rootdir = '...'#wavw文件的路径
list = os.listdir(rootdir)
dic = {}
for i in range(0, len(list)):
path = os.path.join(rootdir, list[i])
if os.path.isfile(path):
print(list[i])
(rate,sig) = wav.read(path)
# plt.plot(sig[:3000])
#plt.show()
mfcc_feat = mfcc(sig,rate)#, nfft=551
mfcc_feat -= (np.mean(mfcc_feat, axis=0) + 1e-8)
mfcc_feat2 = mfcc_feat[0:25,:]#加截断,不显示非零值
# plt.plot(mfcc_feat)
from matplotlib import cm
fig, ax = plt.subplots()
mfcc_data = np.swapaxes(mfcc_feat2, 0, 1)
#cax = ax.imshow(mfcc_data, interpolation='nearest', cmap=cm.coolwarm, origin='lower', aspect='auto')
plt.imshow(mfcc_data, cmap=plt.cm.jet, extent=[0, mfcc_data.shape[1], 0, mfcc_data.shape[0]], aspect='auto')
ax.set_title('MFCC')
plt.show()
d_mfcc_feat = delta(mfcc_feat, 2)
fbank_feat = logfbank(sig,rate,nfft=551)
print(len(fbank_feat))
dic[list[i]] = fbank_feat
output = open(os.path.join(rootdir, 'data.pkl'), 'wb')
pickle.dump(dic, output)
重点想记录的是MFCC的画图,热力图的重点一句是plt.imshow(mfcc_data, cmap=plt.cm.jet, extent=[0, mfcc_data.shape[1], 0, mfcc_data.shape[0]], aspect='auto')
,如果音频文件本身有空白,可能出现如下情况
加了截断的代码mfcc_feat2 = mfcc_feat[0:25,:]
,不显示非零值,让整体更好看
参考资料:
stackoverflow
Speech Processing for Machine Learning
python 实现MFCC