合成混合语音(纯净语音+噪声)和IBM

有纯净语音,有噪声,怎样合成不同信噪比下的混合语音呢?!

function mixwav_IBM(S,db,nois)

    numChan = 64; %通道数
    FS = 8000;    %数据的采样频率
    gamma = gen_gammaton(FS,numChan);
    ft_env = fir1(1024,[70,400]/FS*2,'bandpass');
    [v,fs] = wavread(['babble.wav']);
    v = resample(v,FS,fs);                
    noi.wav = repmat(v,fix(FS*10/length(v))+1,1);
    noi.wav = noi.wav(1:FS*10);
    [~,gf] = fgammaton(noi.wav',gamma,FS,numChan);
    noi.cch = cochleagram(gf');

    [w,fsw] = wavread(['F50A1.WAV']); 
     w = resample(w,FS,fsw); 
    [~,gf] = fgammaton(w',gamma,FS,numChan);
    scch = cochleagram(gf');
    v = noi.wav;

    nLen = min(length(w),length(v));
    w = w(1:nLen); %w是纯净的语音
    v = v(1:nLen); %v是噪声
    db = 0;%混合语音的信噪比是0db
    [m,v] = wavmix(w,v,db);%m是混合语音的波形

     winsize = 256;  
     shiftsize = 80;  
     window = hamming(winsize);  
     winc_w = getframe(w,winsize,shiftsize,window);%纯净语音分帧  
     winc_v = getframe(v,winsize,shiftsize,window); %噪声分帧
     winc_m = getframe(m,winsize,shiftsize,window); %混合语音分帧
     w_fft = abs(fft(winc_w));%纯净语音的fft
     v_fft = abs(fft(winc_v));%噪声语音的fft
     m_fft = abs(fft(winc_m));%混合语音的fft
     amp = zeros(size(w_fft));%掩码的size和原始频谱图的size一样大
     amp(w_fft>v_fft) = 1;

     wavwrite(m,FS,'F50A1_babble.WAV');

     figure;imagesc(w_fft);title('纯净语音')
     figure;imagesc(v_fft);title('噪声')
     figure;imagesc(m_fft);title('混合语音')
     figure;imagesc(amp);title('IBM')
     figure;imagesc(m_fft.*amp);title('IBM盖在混合语音语谱图上得到降噪之后的语音语谱')
     close(figure);
     %如果正确,那么将IBM盖在混合语音的语谱图上 则会得到和纯净语音一样的语谱图
     %纯净语音的语谱图与IBM之后的语谱还是不太一样
end      
function [m,v] = wavmix(w,v,db)
   amp = sqrt(sum(w.^2)/sum(v.^2)/10^(db/10));
   m = w + v*amp;
   v = v*amp;
end
  function winc = getframe(wav,winsize,shiftsize,window)  
    slen = length(wav);  
    numframe = ceil(slen/shiftsize);  
    winc = zeros(winsize,numframe);  
    for n = 1:numframe  
        st = (n-1)*shiftsize;  
        ed = min(st+winsize,slen);  
        winc(1:ed-st,n) = wav(st+1:ed);  
        winc(:,n) = winc(:,n).*window;          
    end  
end
function gt = gen_gammaton(sampFreq, numChannel)
    % Create gammatone filterbank
    % Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11

    if ~exist('sampFreq', 'var')
        sampFreq = 8000; % sampling frequency, default is 8000
    end


    if  ~exist('numChannel', 'var')
        numChannel = 64; % number of channels, default is 64
    end


    stepBound=200000; 

    filterOrder=4;     % filter order

    phase(1:numChannel)=zeros(numChannel,1);        % original phase
    erb_b=hz2erb([50,sampFreq/2]);       % upper and lower bound of ERB
    erb=[erb_b(1):diff(erb_b)/(numChannel-1):erb_b(2)];     % ERB segment
    cf=erb2hz(erb);       % center frequency
    b=1.019*24.7*(4.37*cf/1000+1);       % rate of decay

    gt=zeros(numChannel,stepBound);

    tmp_t=[1:stepBound]/sampFreq;

    for i=1:numChannel

        %gain=10^((loudness(cf(i))-60)/20)/3*(2*pi*b(i)/sampFreq).^4;
        gain = (2*pi*b(i)/sampFreq).^4/3;
        gt(i,:)=gain*sampFreq^3*tmp_t.^(filterOrder-1).*exp(-2*pi*b(i)*tmp_t).*cos(2*pi*cf(i)*tmp_t+phase(i));
    end
end
function erb=hz2erb(hz)
    % Convert normal frequency scale in hz to ERB-rate scale.
    % Units are number of Hz and number of ERBs.
    % ERB stands for Equivalent Rectangular Bandwidth.
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    erb=21.4*log10(4.37e-3*hz+1);

end
function hz=erb2hz(erb)
    % Convert ERB-rate scale to normal frequency scale.
    % Units are number of ERBs and number of Hz.
    % ERB stands for Equivalent Rectangular Bandwidth.
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    hz=(10.^(erb/21.4)-1)/4.37e-3;
end
function [f, tmp]=fgammaton(sig, gamma, sampFreq, numChannel)
    % Filter input signal with a gammatone filterbank and generate GF features
    % sig: input signal
    % gamma: gammtone filter impulse responses
    % sampFreq: sampling frequency
    % numChannel: number of channels
    % Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11

    [ignore,stepBound]=size(sig);

    d = 2^ceil(log2(length(sig)));

    tmp =ifft((fft((ones(numChannel,1)*sig)',d).*fft(gamma(:,1:stepBound)',d)));
    tmp = tmp(1:stepBound,:);
    f=(abs(resample(abs(tmp),100,sampFreq)')).^(1/3);
end
function a = cochleagram(r, winLength,winShift)
    % Generate a cochleagram from responses of a Gammatone filterbank.
    % It gives the log energy of T-F units
    % The first variable is required.
    % winLength: window (frame) length in samples
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    if nargin < 2
        winLength = 320;      % default window length in sample points which is 20 ms for 16 KHz sampling frequency
    end

    [numChan,sigLength] = size(r);     % number of channels and input signal length
    if nargin < 3 
        winShift = winLength/2;        % frame shift (default is half frame)
    end   
    increment = winLength/winShift;    % special treatment for first increment-1 frames

    M = floor(sigLength/winShift);     % number of time frames

    coswin = (1 + cos(2*pi*(0:winLength-1)/winLength - pi))/2;  % calculate a raised cosine window

    % calculate energy for each frame in each channel
    a = zeros(numChan,M);
    for m = 1:M      
        for i = 1:numChan
            if m < increment        % shorter frame lengths for beginning frames
                a(i,m) = r(i,1:m*winShift)*r(i,1:m*winShift)';
            else
                startpoint = (m-increment)*winShift;
                a(i,m) = (r(i,startpoint+1:startpoint+winLength)).*coswin*((r(i,startpoint+1:startpoint+winLength)).*coswin)';
    %             a(i,m) = sum(abs((r(i,startpoint+1:startpoint+winLength))).*coswin);%*((r(i,startpoint+1:startpoint+winLength)))';
            end
        end
    end
end

这里写图片描述

经过掩码掩盖之后,那么怎样从频谱图合成降噪之后的语音呢?!

function wav = syn_mask(n,tt)
% n 噪声谱,n = stft(wav)
% tt 为估计的mask,语音部分为1,其余为0,大小与n相同
if nargin < 2
    tt = ones(size(n));
end
if size(tt,1)~=129
    tt = tt';
end
if size(n,1)~=129
    n = n';
end
wav = istft(n.*tt);
% ang = angle(n);
% wav = istft(exp(tt').*(cos(ang)+1i*sin(ang)))';
function wav = syn_spec(n,tt)
% n 噪声谱,n = stft(wav)
% tt 为估计的幅度谱,大小与n相同
if size(tt,1)==129
    tt = tt';
end
%wav = istft(n.*tt);
ang = angle(n);
wav = istft((tt').*(cos(ang')+1i*sin(ang')))';`这里写代码片`
function d = stft(x)
s = length(x);
f = 256;

win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800];

c = 1;
h = 80;

% pre-allocate output array
d = complex(zeros((1+f/2),1+fix((s-f)/h)));

for b = 0:h:(s-f)
  u = win.*x((b+1):(b+f));
  t = fft(u);
  d(:,c) = t(1:(1+f/2))';
  c = c+1;
end;
function x = istft(d)
% X = istft(D, F, W, H)                   Inverse short-time Fourier transform.
%   Performs overlap-add resynthesis from the short-time Fourier transform 
%   data in D.  Each column of D is taken as the result of an F-point 
%   fft; each successive frame was offset by H points. Data is 
%   hamm-windowed at W pts.  
%       W = 0 gives a rectangular window; W as a vector uses that as window.
% dpwe 1994may24.  Uses built-in 'ifft' etc.
% $Header: /homes/dpwe/public_html/resources/matlab/pvoc/RCS/istft.m,v 1.4 2009/01/07 04:20:00 dpwe Exp $

ftsize = 256;
h = 80;
s = size(d);

cols = s(2);
xlen = ftsize + (cols-1)*h;
x = zeros(1,xlen);

win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800]';



for b = 0:h:(h*(cols-1))
  ft = d(:,1+b/h)';
  ft = [ft, conj(ft([((ftsize/2)):-1:2]))];
  px = real(ifft(ft));
  x((b+1):(b+ftsize)) = x((b+1):(b+ftsize))+px.*win;
end;
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值