import numpy as np
from python_speech_features import mfcc # 需要pip install
import scipy.io.wavfile as wav
import os
def get_wavs_lables(wav_path, label_file):
# 获得训练用的wav文件路径列表
wav_files = []
for (dirpath, dirnames, filenames) in os.walk(wav_path):
for filename in filenames:
if filename.endswith('.wav') or filename.endswith('.WAV'):
filename_path = os.sep.join([dirpath, filename])
if os.stat(filename_path).st_size < 240000: # 剔除掉一些小文件
continue
wav_files.append(filename_path)
labels_dict = {}
with open(label_file, 'rb') as f:
for label in f:
label = label.strip(b'\n')
label_id = label.split(b' ', 1)[0]
label_text = label.split(b' ', 1)[1]
labels_dict[label_id.decode('ascii')] = label_text.decode('utf-8')
labels = []
new_wav_files = []
for wav_file in wav_files:
wav_id = os.path.basename(wav_file).split('.')[0]
if wav_id in labels_dict:
labels.append(labels_dict[wav_id])
new_wav_files.append(wav_file)
return new_wav_files, labels