· 如何实现数据增强(Data Augmentation)?
语音识别,需要大量的数据样本,试验中收集的样本个数有限,可以采用数据增强的方式扩增数据,而不改变数据中原有的信息。
音频数据常见的数据增强方式有:加噪,Shifting, Stretching
加噪的Python 代码如下:
#coding=gbk
import os
import librosa
import numpy as np
def Add_noise(data,p):
wn = np.random.normal(0,1,len(data))
data_noise = p*wn+data
return data_noise
path = 'E:/xxx/xxx/'
files = os.listdir(path)
files = [path + f for f in files if f.endswith('.wav')]
for i in range(len(files)):
# 加载需要进行加噪声扩增的音频
FileName = files[i]
print("add noise File Name is ", FileName)
y, sr = librosa.load(files[i], sr=None)
t = librosa.get_duration(y=y, sr=sr)
print(t)
# 设置加噪声扩增后的wav文件保存名称及保存路径
save_name = FileName[105:-4]+'-noise'+'.wav'
print(save_name)
save_path = "E:xxxxxx"
path_noise = save_path+save_name
print(path_noise)
# 加入噪声,进行信号扩增
p = 0.02 #加入0.02*高斯白噪声
data_noise = Add_noise(y, p)
# Saving the audio
librosa.output.write_wav(path_noise, data_noise, sr)
print('run over!')
Shifting 和 Stretching的代码如下:
#coding=gbk
import os
import librosa
import numpy as np
import random
import matplotlib.pyplot as plt
import librosa.display
%matplotlib inline
def Shift_Wave(y,sr):
n_steps = random.randint(-10, 10)
y_ps = librosa.effects.pitch_shift(y, sr, n_steps) #核心参数:n_steps
plt.subplot(511)
librosa.display.waveplot(y_ps,sr=sr)
plt.title('Pitch Shift transformed waveform')
return y_ps
def Stretch_Wave(y):
rate = random.uniform(0.8, 1.2)
y_ts = librosa.effects.time_stretch(y, rate) #核心参数:rate
plt.subplot(513)
librosa.display.waveplot(y_ts,sr=sr)
plt.title('Time Stretch transformed waveform')
return y_ts
path = 'E:/xxx/xxx/'
files = os.listdir(path)
files = [path + f for f in files if f.endswith('.wav')]
for i in range(len(files)):
# 加载需要进行数据扩增的音频
FileName = files[i]
print("Shift Wave File Name is ", FileName)
y, sr = librosa.load(files[i], sr=None)
plt.subplot(515)
librosa.display.waveplot(y,sr=sr)
# 设置Shift扩增后的wav文件保存名称及保存路径
save_name = FileName[105:-4]+'-shift'+'.wav'
print(save_name)
save_path_shift = "E:xxxxxx"
path_shift = save_path_shift+save_name
print(path_shift)
# 进行shift信号扩增
data_shift = Shift_Wave(y,sr)
# 设置stretch扩增后的wav文件保存名称及保存路径
save_name = FileName[105:-4]+'-stretch'+'.wav'
print(save_name)
save_path_stretch = "E:xxxxxx"
path_stretch = save_path_stretch+save_name
print(path_stretch)
# 进行stretch信号扩增
data_stretch = Stretch_Wave(y)
# Saving the audio
librosa.output.write_wav(path_shift, data_shift, sr)
librosa.output.write_wav(path_stretch, data_stretch, sr)
print('run over!')