文件下载地址:
链接: https://pan.baidu.com/s/1lJfh1HyEQ2dRSbAhVoJNSA 提取码: dj6q
"""
语音识别
"""
import numpy as np
import scipy.io.wavfile as wf
import python_speech_features as sf
import sklearn.svm as svm
import sklearn.metrics as sm
import sklearn.preprocessing as sp
import os
# 整理样本
def search_files(directory):
"""
检索目录下的所有wav文件 返回目录字典
{“appple”:[url, url...],
“kiwi”:[url, url...],....}
"""
files_dict = {}
for cur_dir, sub_dirs, files in os.walk(directory):
for file in files:
if file.endswith(".wav"):
label = cur_dir.split(os.path.sep)[-1]
if label not in files_dict:
files_dict[label] = []
url = os.path.join(cur_dir, file)
files_dict[label].append(url)
return files_dict
def files_mfc(file_urls):
"""
读取语音文件 并且 MFCC化音频文件 梅尔频率倒谱系数
返回整理好的 样本数据
"""
x_data, y_data = [], []
for label, urls in file_urls.items():
for file in urls:
sample_rate, signs = wf.read(file)
mfc = sf.mfcc(signs, sample_rate)
# ==>(1,13)
sample = np.mean(mfc, axis=0) # 求平均值
x_data.append(sample) # 音频特征数组
y_data.append(label) # 标签特征数组
x_data = np.array(x_data)
return x_data, y_data
# 读取训练集 文件路径
train_urls = search_files("./speeches/training")
print(train_urls)
# 整理训练集
train_x, train_y = files_mfc(train_urls)
# 标签编码
encoder = sp.LabelEncoder()
train_y_label = encoder.fit_transform(train_y)
print(train_x.shape, train_y_label.shape)
# 创建模型 SVC 并训练
model = svm.SVC(kernel="poly", degree=2, gamma="auto", probability=True)
model.fit(train_x, train_y_label)
# 读取测试集 文件路径
test_urls = search_files("./speeches/testing")
print(test_urls)
# 整理测试集
test_x, test_y = files_mfc(test_urls)
test_y_label = encoder.transform(test_y)
# 预测测试集 输出分类报告
prd_test_y = model.predict(test_x)
print(sm.classification_report(test_y_label, prd_test_y))
# 输出置信概率
probs = model.predict_proba(test_x)
print(np.round(probs, 3))
# 循环打印结果
for label, prob in zip(encoder.inverse_transform(prd_test_y), probs.max(axis=1)):
print(label, np.round(prob, 3))