有纯净语音,有噪声,怎样合成不同信噪比下的混合语音呢?!
function mixwav_IBM(S,db,nois)
numChan = 64; %通道数
FS = 8000; %数据的采样频率
gamma = gen_gammaton(FS,numChan);
ft_env = fir1(1024,[70,400]/FS*2,'bandpass');
[v,fs] = wavread(['babble.wav']);
v = resample(v,FS,fs);
noi.wav = repmat(v,fix(FS*10/length(v))+1,1);
noi.wav = noi.wav(1:FS*10);
[~,gf] = fgammaton(noi.wav',gamma,FS,numChan);
noi.cch = cochleagram(gf');
[w,fsw] = wavread(['F50A1.WAV']);
w = resample(w,FS,fsw);
[~,gf] = fgammaton(w',gamma,FS,numChan);
scch = cochleagram(gf');
v = noi.wav;
nLen = min(length(w),length(v));
w = w(1:nLen); %w是纯净的语音
v = v(1:nLen); %v是噪声
db = 0;%混合语音的信噪比是0db
[m,v] = wavmix(w,v,db);%m是混合语音的波形
winsize = 256;
shiftsize = 80;
window = hamming(winsize);
winc_w = getframe(w,winsize,shiftsize,window);%纯净语音分帧
winc_v = getframe(v,winsize,shiftsize,window); %噪声分帧
winc_m = getframe(m,winsize,shiftsize,window); %混合语音分帧
w_fft = abs(fft(winc_w));%纯净语音的fft
v_fft = abs(fft(winc_v));%噪声语音的fft
m_fft = abs(fft(winc_m));%混合语音的fft
amp = zeros(size(w_fft));%掩码的size和原始频谱图的size一样大
amp(w_fft>v_fft) = 1;
wavwrite(m,FS,'F50A1_babble.WAV');
figure;imagesc(w_fft);title('纯净语音')
figure;imagesc(v_fft);title('噪声')
figure;imagesc(m_fft);title('混合语音')
figure;imagesc(amp);title('IBM')
figure;imagesc(m_fft.*amp);title('IBM盖在混合语音语谱图上得到降噪之后的语音语谱')
close(figure);
%如果正确,那么将IBM盖在混合语音的语谱图上 则会得到和纯净语音一样的语谱图
%纯净语音的语谱图与IBM之后的语谱还是不太一样
end
function [m,v] = wavmix(w,v,db)
amp = sqrt(sum(w.^2)/sum(v.^2)/10^(db/10));
m = w + v*amp;
v = v*amp;
end
function winc = getframe(wav,winsize,shiftsize,window)
slen = length(wav);
numframe = ceil(slen/shiftsize);
winc = zeros(winsize,numframe);
for n = 1:numframe
st = (n-1)*shiftsize;
ed = min(st+winsize,slen);
winc(1:ed-st,n) = wav(st+1:ed);
winc(:,n) = winc(:,n).*window;
end
end
function gt = gen_gammaton(sampFreq, numChannel)
% Create gammatone filterbank
% Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11
if ~exist('sampFreq', 'var')
sampFreq = 8000; % sampling frequency, default is 8000
end
if ~exist('numChannel', 'var')
numChannel = 64; % number of channels, default is 64
end
stepBound=200000;
filterOrder=4; % filter order
phase(1:numChannel)=zeros(numChannel,1); % original phase
erb_b=hz2erb([50,sampFreq/2]); % upper and lower bound of ERB
erb=[erb_b(1):diff(erb_b)/(numChannel-1):erb_b(2)]; % ERB segment
cf=erb2hz(erb); % center frequency
b=1.019*24.7*(4.37*cf/1000+1); % rate of decay
gt=zeros(numChannel,stepBound);
tmp_t=[1:stepBound]/sampFreq;
for i=1:numChannel
%gain=10^((loudness(cf(i))-60)/20)/3*(2*pi*b(i)/sampFreq).^4;
gain = (2*pi*b(i)/sampFreq).^4/3;
gt(i,:)=gain*sampFreq^3*tmp_t.^(filterOrder-1).*exp(-2*pi*b(i)*tmp_t).*cos(2*pi*cf(i)*tmp_t+phase(i));
end
end
function erb=hz2erb(hz)
% Convert normal frequency scale in hz to ERB-rate scale.
% Units are number of Hz and number of ERBs.
% ERB stands for Equivalent Rectangular Bandwidth.
% Written by ZZ Jin, and adapted by DLW in Jan'07
erb=21.4*log10(4.37e-3*hz+1);
end
function hz=erb2hz(erb)
% Convert ERB-rate scale to normal frequency scale.
% Units are number of ERBs and number of Hz.
% ERB stands for Equivalent Rectangular Bandwidth.
% Written by ZZ Jin, and adapted by DLW in Jan'07
hz=(10.^(erb/21.4)-1)/4.37e-3;
end
function [f, tmp]=fgammaton(sig, gamma, sampFreq, numChannel)
% Filter input signal with a gammatone filterbank and generate GF features
% sig: input signal
% gamma: gammtone filter impulse responses
% sampFreq: sampling frequency
% numChannel: number of channels
% Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11
[ignore,stepBound]=size(sig);
d = 2^ceil(log2(length(sig)));
tmp =ifft((fft((ones(numChannel,1)*sig)',d).*fft(gamma(:,1:stepBound)',d)));
tmp = tmp(1:stepBound,:);
f=(abs(resample(abs(tmp),100,sampFreq)')).^(1/3);
end
function a = cochleagram(r, winLength,winShift)
% Generate a cochleagram from responses of a Gammatone filterbank.
% It gives the log energy of T-F units
% The first variable is required.
% winLength: window (frame) length in samples
% Written by ZZ Jin, and adapted by DLW in Jan'07
if nargin < 2
winLength = 320; % default window length in sample points which is 20 ms for 16 KHz sampling frequency
end
[numChan,sigLength] = size(r); % number of channels and input signal length
if nargin < 3
winShift = winLength/2; % frame shift (default is half frame)
end
increment = winLength/winShift; % special treatment for first increment-1 frames
M = floor(sigLength/winShift); % number of time frames
coswin = (1 + cos(2*pi*(0:winLength-1)/winLength - pi))/2; % calculate a raised cosine window
% calculate energy for each frame in each channel
a = zeros(numChan,M);
for m = 1:M
for i = 1:numChan
if m < increment % shorter frame lengths for beginning frames
a(i,m) = r(i,1:m*winShift)*r(i,1:m*winShift)';
else
startpoint = (m-increment)*winShift;
a(i,m) = (r(i,startpoint+1:startpoint+winLength)).*coswin*((r(i,startpoint+1:startpoint+winLength)).*coswin)';
% a(i,m) = sum(abs((r(i,startpoint+1:startpoint+winLength))).*coswin);%*((r(i,startpoint+1:startpoint+winLength)))';
end
end
end
end
经过掩码掩盖之后,那么怎样从频谱图合成降噪之后的语音呢?!
function wav = syn_mask(n,tt)
% n 噪声谱,n = stft(wav)
% tt 为估计的mask,语音部分为1,其余为0,大小与n相同
if nargin < 2
tt = ones(size(n));
end
if size(tt,1)~=129
tt = tt';
end
if size(n,1)~=129
n = n';
end
wav = istft(n.*tt);
% ang = angle(n);
% wav = istft(exp(tt').*(cos(ang)+1i*sin(ang)))';
function wav = syn_spec(n,tt)
% n 噪声谱,n = stft(wav)
% tt 为估计的幅度谱,大小与n相同
if size(tt,1)==129
tt = tt';
end
%wav = istft(n.*tt);
ang = angle(n);
wav = istft((tt').*(cos(ang')+1i*sin(ang')))';`这里写代码片`
function d = stft(x)
s = length(x);
f = 256;
win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800];
c = 1;
h = 80;
% pre-allocate output array
d = complex(zeros((1+f/2),1+fix((s-f)/h)));
for b = 0:h:(s-f)
u = win.*x((b+1):(b+f));
t = fft(u);
d(:,c) = t(1:(1+f/2))';
c = c+1;
end;
function x = istft(d)
% X = istft(D, F, W, H) Inverse short-time Fourier transform.
% Performs overlap-add resynthesis from the short-time Fourier transform
% data in D. Each column of D is taken as the result of an F-point
% fft; each successive frame was offset by H points. Data is
% hamm-windowed at W pts.
% W = 0 gives a rectangular window; W as a vector uses that as window.
% dpwe 1994may24. Uses built-in 'ifft' etc.
% $Header: /homes/dpwe/public_html/resources/matlab/pvoc/RCS/istft.m,v 1.4 2009/01/07 04:20:00 dpwe Exp $
ftsize = 256;
h = 80;
s = size(d);
cols = s(2);
xlen = ftsize + (cols-1)*h;
x = zeros(1,xlen);
win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800]';
for b = 0:h:(h*(cols-1))
ft = d(:,1+b/h)';
ft = [ft, conj(ft([((ftsize/2)):-1:2]))];
px = real(ifft(ft));
x((b+1):(b+ftsize)) = x((b+1):(b+ftsize))+px.*win;
end;