MCRA2
总体步骤:
跟踪最小值采用的“连续频谱最小值跟踪算法
过减技术的谱减法
Matlab代码
% mcra2 + Spectral Subtraction
% 2023.8.14 Author: Sfj
clc;clear;close all;
% %%%%%%%%%%%%%%%%- Read input-file %%%%%%%%%%%%%%
inputname= 'test_wav';
path= 'data/';
read_pcm= 0;
outpath= ['data/out0/'];
mkdir(outpath);
Channel= 1;
if (read_pcm== 1)
fileId= fopen([path,inputname,'.pcm'],'r');
x= fread(fileId, inf,'int16');
fclose(fileId);
for i= 1:Channel;
x_in(:,i)= x(i:Channel:end);
end
else
[x_in,fs]= audioread([path,inputname,'.wav'],'native');
x_in= x_in(:,Channel);
end
x= double(int16(x_in));
frame_size= floor(16 * fs / 1000) % 16ms,256
FFT_size= 2 * frame_size;
FFT_bins= frame_size + 1;
x_old= zeros(frame_size,Channel);
%win= hanning(FFT_size);
win = (sin(pi*(0.5:1:FFT_size-0.5)./FFT_size)).';
w=2;
b=hanning(2*w+1);
b=b/sum(b);
track_min = 0; % 选择搜索最小值的方法,滑动窗口找最小值
if track_min == 1
Nwin= 8;
Vwin= 15;
l_mod_lswitch=0;
Smin_sw = zeros(FFT_bins,Nwin);s
end
% Initination
freq_res= fs / FFT_size;
k_1khz= floor(1000 / freq_res);
k_3khz= floor(3000 / freq_res);
delay_val= [2*ones(k_1khz,Channel);2*ones(k_3khz-k_1khz,Channel);5*ones(FFT_bins-k_3khz,Channel)];
alpha_s_mc2= 0.8;
gamma_mc2= 0.998;
beta_mc2= 0.96;
alpha_p_mc2= 0.2;
alpha_d_mc2= 0.95;
beta= 0.002;
img= sqrt(-1);
S_old= zeros(FFT_bins,Channel);
Smin_old= zeros(FFT_bins,Channel);
phat= zeros(FFT_bins,Channel);
lambda_dav= zeros(FFT_bins,Channel);
x_frame_old= zeros(frame_size,Channel);
function a=berouti(SNR)
if SNR>=-5.0 & SNR<=20
a=4-SNR*3/20;
else
if SNR<-5.0
a=5;
end
if SNR>20
a=1;
end
end
endfunction
h = waitbar(0,'Running...');
set(h,'name','mcra2_spsub - 0%');
nb_frames= fix((size(x,1) - frame_size) / frame_size);
for frame_idx= 1:nb_frames
pos= frame_size * (frame_idx-1) + 1;
x_new= x(pos:pos+frame_size-1,:);
x_frame= [x_old;x_new];
x_old= x_new;
temp= fft((x_frame.*win));
Y= temp(1:FFT_bins);
Y_mag= abs(Y);
Ya2= Y_mag.*Y_mag;
Yf= conv(b,Ya2); % _mcra2_out3 add
Yf= Yf(w+1:FFT_bins+w);
% mcra2
% Continuous Spectral Minimum Tracking
S= alpha_s_mc2 * S_old + (1-alpha_s_mc2) * Yf;
Sminc= S;
Idx= find(Smin_old < S);
Sminc(Idx)= gamma_mc2.*Smin_old(Idx) + (1-gamma_mc2)/(1-beta_mc2).*(S(Idx)-beta_mc2.*S_old(Idx)); %连续谱找最小值
S_old= S;
Smin_old= Sminc;
if(track_min==1)
if frame_idx < Vwin
Smin = S;
SMact = S;
endif
Smin = min(Smin,S);
SMact = min(SMact,S);
end
Smin_final = Sminc; % 最终参与计算的最小值
% Calculate the probability of speech presence
p= zeros(FFT_bins,Channel);
Idx2= find(S>(Smin_final.*delay_val));
p(Idx2)= 1.0;
phat= alpha_p_mc2.*phat + (1-alpha_p_mc2).*p;
alpha_dt= alpha_d_mc2+(1-alpha_d_mc2)*phat;
lambda_dav= alpha_dt.*lambda_dav+(1-alpha_dt).*Ya2; % Estimated Noise Magnitude Spectrum
lambda_d=1.4685*lambda_dav; % _mcra2_out4 add
if(track_min==1)
if frame_idx < Vwin
l_mod_lswitch = Vwin-1;
end
l_mod_lswitch=l_mod_lswitch+1;
if l_mod_lswitch==Vwin
Smin_sw= [Smin_sw(:,2:end),SMact];
Smin = min(Smin_sw,[],2);
SMact = S;
endif
end
% Spectral Subtraction
noise_mag= sqrt(lambda_d);
phase= angle(Y);%atan2(imag(Y),real(Y)); % 弧度
SNRseg= 10*log10(norm(Y_mag,2)^2/norm(noise_mag,2)^2);
alpha_sub= berouti(SNRseg);
Xpower= Ya2-alpha_sub.*lambda_d;
z= find(Xpower < beta*lambda_d);
Xpower(z)= beta*lambda_d(z);
% Xpower = max(Ya2-alpha_sub.*lambda_d, beta*lambda_d);
X_mag= sqrt(Xpower);
X= X_mag.*(cos(phase)+ img*sin(phase));
% IFFT and OLA
X(FFT_bins+1:FFT_size)=conj(X(FFT_bins-1:-1:2));
x_frame_out= win.*real(ifft(X));
x_out_frame= x_frame_out(1:frame_size,:)+x_frame_old;
x_frame_old= x_frame_out(frame_size+1:end,:);
NS_OUT(pos:pos+frame_size-1)= x_out_frame;
waitbar(frame_idx / nb_frames,h);
set(h,'name',[sprintf('%2.1f',frame_idx / nb_frames*100)]);
end
close(h);
audiowrite([outpath,inputname,'_mcra2_out5','.wav'],NS_OUT/32767,fs);
参考文献
【Speech_enhancement_theory_and_practice ——Loizou】