import os
import numpy as np
from scipy.io import wavfile # 仅限于wav音频文件
from hmmlearn import hmm
from python_speech_features import mfcc
创建HMM类详见:https://www.cnblogs.com/pinard/p/7001397.html
MFCC详见:https://blog.csdn.net/xmdxcsj/article/details/51228791
class HMMTrainer(object):
def init(self, model_name=’GaussianHMM’, n_components=4, cov_type=’diag’, n_iter=1000):
self.model_name = model_name #模型名称 hmmlearn实现了三种HMM模型类,GaussianHMM和GMMHMM是连续观测状态的HMM模型,MultinomialHMM是离散观测状态的模型
self.n_components = n_components #隐藏状态个数
self.cov_type = cov_type #转移矩阵协方差类型
self.n_iter = n_iter #训练迭代次数
self.models = []
if self.model_name == 'GaussianHMM':
self.model = hmm.GaussianHMM(n_components=self.n_components,
covariance_type=self.cov_type, n_iter=self.n_iter)
else:
raise TypeError('Invalid model type')
# X是2维numpy数组每行13列
def train(self, X):
np.seterr(all='ignore')
self.models.append(self.model.fit(X))
# 测试输入的模型得分
def get_score(self, input_data):
return self.model.score(input_data)
if name==’main‘:
# 输入文件夹路径(输入路径包含子文件夹,每个子文件夹名为音频文件名称,内含多个同音频,供模型训练)
input_folder = args.input_folder
hmm_models = []
# 遍历输入路径
for dirname in os.listdir(input_folder):
# 获取子文件名
subfolder = os.path.join(input_folder, dirname)
# 跳过非文件夹
if not os.path.isdir(subfolder):
continue
# 子文件夹名作为标签
label = subfolder[subfolder.rfind('/') + 1:]
# 初始化变量
X = np.array([])
y_words = []
# 迭代音频文件 (每类文件即:子文件夹文件,留下一个做测试用)
for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]:
# 读取音频文件
filepath = os.path.join(subfolder, filename)
sampling_freq, audio = wavfile.read(filepath)
# 解析MFCC特征(MFCC:梅尔频率倒谱系数)
mfcc_features = mfcc(audio, sampling_freq)
# 添加mfcc_features
if len(X) == 0:
X = mfcc_features
else:
X = np.append(X, mfcc_features, axis=0)
# 添加标签
y_words.append(label)
print 'X.shape =', X.shape
# 训练并保存模型
# 创建HMM类
hmm_trainer = HMMTrainer()
# 训练模型
hmm_trainer.train(X)
hmm_models.append((hmm_trainer, label))
hmm_trainer = None
# 测试文件路径
input_files = [
'data/pineapple/pineapple15.wav',
'data/orange/orange15.wav',
'data/apple/apple15.wav',
'data/kiwi/kiwi15.wav'
]
# 读取测试文件
for input_file in input_files:
# 读取测试文件
# sampling_freq采样频率
sampling_freq, audio = wavfile.read(input_file)
# 提取MFCC特征
mfcc_features = mfcc(audio, sampling_freq)
# 定义模型得分,输出标签
max_score = None
output_label = None
# 迭代所有模型
# 得分最高的模型对应的标签,即为输出标签(识别值)
for item in hmm_models:
hmm_model, label = item
score = hmm_model.get_score(mfcc_features)
if score > max_score:
max_score = score
output_label = label
# 打印输出
print "\nTrue:", input_file[input_file.find('/')+1:input_file.rfind('/')]
print "Predicted:", output_label
转载请注明出处