import os
import wavio
import numpy as np
import math
from matplotlib import pyplot as plt
from scipy.fftpack import dct
from python_speech_features import mfcc, delta, logfbank
读取语音数据及主函数
for wav in wavs:
wav_dir = os.path.join(data_dir, wav)
wav_data = wavio.read(wav_dir)
data = wav_data.data
sample_rate = wav_data.rate #16k
sampwidth = wav_data.sampwidth
#normalization
norm_data = data/max(abs(data))#frames
frames = frames_crop(norm_data,sample_rate)#add window
win =160
windows = choose_windows(name ="Hamming",N = win)#parameters
N =2048#NFFT
M =40#filters number
num_ceps =24#fft
spe_freqs = np.zeros((frames.shape[0],int(N/2)))#spectrogram
fbank_feature = np.zeros((frames.shape[0],M))#Fbank
fbank_feature_2 = np.zeros((frames.shape[0],M))#Fbank second version
mfcc_dct = np.zeros((frames.shape[0],num_ceps))#dctfor i inrange(frames.shape[0]):
frames_fft = np.fft.fft(windows * frames[i],N)
spe_freqs[i][:]= log_data(np.abs(frames_fft[:int(N/2)]))
filter_banks = mel_filters(sample_rate = sample_rate, NFFT = N, pow_frames = np.abs(frames_fft[:int(N/2