合成混合语音(纯净语音+噪声)和IBM

最新推荐文章于 2023-04-05 15:09:05 发布

S20091103372

最新推荐文章于 2023-04-05 15:09:05 发布

阅读量4.2k

点赞数

分类专栏：语音 matlab代码文章标签：数据 gamma db

本文链接：https://blog.csdn.net/S20091103372/article/details/46672619

版权

语音同时被 2 个专栏收录

8 篇文章 0 订阅

订阅专栏

matlab代码

7 篇文章 0 订阅

订阅专栏

有纯净语音，有噪声，怎样合成不同信噪比下的混合语音呢？！

function mixwav_IBM(S,db,nois)

    numChan = 64; %通道数
    FS = 8000;    %数据的采样频率
    gamma = gen_gammaton(FS,numChan);
    ft_env = fir1(1024,[70,400]/FS*2,'bandpass');
    [v,fs] = wavread(['babble.wav']);
    v = resample(v,FS,fs);                
    noi.wav = repmat(v,fix(FS*10/length(v))+1,1);
    noi.wav = noi.wav(1:FS*10);
    [~,gf] = fgammaton(noi.wav',gamma,FS,numChan);
    noi.cch = cochleagram(gf');

    [w,fsw] = wavread(['F50A1.WAV']); 
     w = resample(w,FS,fsw); 
    [~,gf] = fgammaton(w',gamma,FS,numChan);
    scch = cochleagram(gf');
    v = noi.wav;

    nLen = min(length(w),length(v));
    w = w(1:nLen); %w是纯净的语音
    v = v(1:nLen); %v是噪声
    db = 0;%混合语音的信噪比是0db
    [m,v] = wavmix(w,v,db);%m是混合语音的波形

     winsize = 256;  
     shiftsize = 80;  
     window = hamming(winsize);  
     winc_w = getframe(w,winsize,shiftsize,window);%纯净语音分帧  
     winc_v = getframe(v,winsize,shiftsize,window); %噪声分帧
     winc_m = getframe(m,winsize,shiftsize,window); %混合语音分帧
     w_fft = abs(fft(winc_w));%纯净语音的fft
     v_fft = abs(fft(winc_v));%噪声语音的fft
     m_fft = abs(fft(winc_m));%混合语音的fft
     amp = zeros(size(w_fft));%掩码的size和原始频谱图的size一样大
     amp(w_fft>v_fft) = 1;

     wavwrite(m,FS,'F50A1_babble.WAV');

     figure;imagesc(w_fft);title('纯净语音')
     figure;imagesc(v_fft);title('噪声')
     figure;imagesc(m_fft);title('混合语音')
     figure;imagesc(amp);title('IBM')
     figure;imagesc(m_fft.*amp);title('IBM盖在混合语音语谱图上得到降噪之后的语音语谱')
     close(figure);
     %如果正确，那么将IBM盖在混合语音的语谱图上 则会得到和纯净语音一样的语谱图
     %纯净语音的语谱图与IBM之后的语谱还是不太一样
end

function [m,v] = wavmix(w,v,db)
   amp = sqrt(sum(w.^2)/sum(v.^2)/10^(db/10));
   m = w + v*amp;
   v = v*amp;
end

  function winc = getframe(wav,winsize,shiftsize,window)  
    slen = length(wav);  
    numframe = ceil(slen/shiftsize);  
    winc = zeros(winsize,numframe);  
    for n = 1:numframe  
        st = (n-1)*shiftsize;  
        ed = min(st+winsize,slen);  
        winc(1:ed-st,n) = wav(st+1:ed);  
        winc(:,n) = winc(:,n).*window;          
    end  
end

function gt = gen_gammaton(sampFreq, numChannel)
    % Create gammatone filterbank
    % Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11

    if ~exist('sampFreq', 'var')
        sampFreq = 8000; % sampling frequency, default is 8000
    end


    if  ~exist('numChannel', 'var')
        numChannel = 64; % number of channels, default is 64
    end


    stepBound=200000; 

    filterOrder=4;     % filter order

    phase(1:numChannel)=zeros(numChannel,1);        % original phase
    erb_b=hz2erb([50,sampFreq/2]);       % upper and lower bound of ERB
    erb=[erb_b(1):diff(erb_b)/(numChannel-1):erb_b(2)];     % ERB segment
    cf=erb2hz(erb);       % center frequency
    b=1.019*24.7*(4.37*cf/1000+1);       % rate of decay

    gt=zeros(numChannel,stepBound);

    tmp_t=[1:stepBound]/sampFreq;

    for i=1:numChannel

        %gain=10^((loudness(cf(i))-60)/20)/3*(2*pi*b(i)/sampFreq).^4;
        gain = (2*pi*b(i)/sampFreq).^4/3;
        gt(i,:)=gain*sampFreq^3*tmp_t.^(filterOrder-1).*exp(-2*pi*b(i)*tmp_t).*cos(2*pi*cf(i)*tmp_t+phase(i));
    end
end

function erb=hz2erb(hz)
    % Convert normal frequency scale in hz to ERB-rate scale.
    % Units are number of Hz and number of ERBs.
    % ERB stands for Equivalent Rectangular Bandwidth.
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    erb=21.4*log10(4.37e-3*hz+1);

end
function hz=erb2hz(erb)
    % Convert ERB-rate scale to normal frequency scale.
    % Units are number of ERBs and number of Hz.
    % ERB stands for Equivalent Rectangular Bandwidth.
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    hz=(10.^(erb/21.4)-1)/4.37e-3;
end

function [f, tmp]=fgammaton(sig, gamma, sampFreq, numChannel)
    % Filter input signal with a gammatone filterbank and generate GF features
    % sig: input signal
    % gamma: gammtone filter impulse responses
    % sampFreq: sampling frequency
    % numChannel: number of channels
    % Written by Yang Shao, and adapted by Xiaojia Zhao in Oct'11

    [ignore,stepBound]=size(sig);

    d = 2^ceil(log2(length(sig)));

    tmp =ifft((fft((ones(numChannel,1)*sig)',d).*fft(gamma(:,1:stepBound)',d)));
    tmp = tmp(1:stepBound,:);
    f=(abs(resample(abs(tmp),100,sampFreq)')).^(1/3);
end

function a = cochleagram(r, winLength,winShift)
    % Generate a cochleagram from responses of a Gammatone filterbank.
    % It gives the log energy of T-F units
    % The first variable is required.
    % winLength: window (frame) length in samples
    % Written by ZZ Jin, and adapted by DLW in Jan'07

    if nargin < 2
        winLength = 320;      % default window length in sample points which is 20 ms for 16 KHz sampling frequency
    end

    [numChan,sigLength] = size(r);     % number of channels and input signal length
    if nargin < 3 
        winShift = winLength/2;        % frame shift (default is half frame)
    end   
    increment = winLength/winShift;    % special treatment for first increment-1 frames

    M = floor(sigLength/winShift);     % number of time frames

    coswin = (1 + cos(2*pi*(0:winLength-1)/winLength - pi))/2;  % calculate a raised cosine window

    % calculate energy for each frame in each channel
    a = zeros(numChan,M);
    for m = 1:M      
        for i = 1:numChan
            if m < increment        % shorter frame lengths for beginning frames
                a(i,m) = r(i,1:m*winShift)*r(i,1:m*winShift)';
            else
                startpoint = (m-increment)*winShift;
                a(i,m) = (r(i,startpoint+1:startpoint+winLength)).*coswin*((r(i,startpoint+1:startpoint+winLength)).*coswin)';
    %             a(i,m) = sum(abs((r(i,startpoint+1:startpoint+winLength))).*coswin);%*((r(i,startpoint+1:startpoint+winLength)))';
            end
        end
    end
end

这里写图片描述

经过掩码掩盖之后，那么怎样从频谱图合成降噪之后的语音呢？！

function wav = syn_mask(n,tt)
% n 噪声谱，n = stft(wav)
% tt 为估计的mask，语音部分为1，其余为0，大小与n相同
if nargin < 2
    tt = ones(size(n));
end
if size(tt,1)~=129
    tt = tt';
end
if size(n,1)~=129
    n = n';
end
wav = istft(n.*tt);
% ang = angle(n);
% wav = istft(exp(tt').*(cos(ang)+1i*sin(ang)))';

function wav = syn_spec(n,tt)
% n 噪声谱，n = stft(wav)
% tt 为估计的幅度谱，大小与n相同
if size(tt,1)==129
    tt = tt';
end
%wav = istft(n.*tt);
ang = angle(n);
wav = istft((tt').*(cos(ang')+1i*sin(ang')))';`这里写代码片`

function d = stft(x)
s = length(x);
f = 256;

win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800];

c = 1;
h = 80;

% pre-allocate output array
d = complex(zeros((1+f/2),1+fix((s-f)/h)));

for b = 0:h:(s-f)
  u = win.*x((b+1):(b+f));
  t = fft(u);
  d(:,c) = t(1:(1+f/2))';
  c = c+1;
end;

function x = istft(d)
% X = istft(D, F, W, H)                   Inverse short-time Fourier transform.
%   Performs overlap-add resynthesis from the short-time Fourier transform 
%   data in D.  Each column of D is taken as the result of an F-point 
%   fft; each successive frame was offset by H points. Data is 
%   hamm-windowed at W pts.  
%       W = 0 gives a rectangular window; W as a vector uses that as window.
% dpwe 1994may24.  Uses built-in 'ifft' etc.
% $Header: /homes/dpwe/public_html/resources/matlab/pvoc/RCS/istft.m,v 1.4 2009/01/07 04:20:00 dpwe Exp $

ftsize = 256;
h = 80;
s = size(d);

cols = s(2);
xlen = ftsize + (cols-1)*h;
x = zeros(1,xlen);

win=[0.0800
0.0801
0.0806
0.0813
0.0822
0.0835
0.0850
0.0868
0.0889
0.0913
0.0939
0.0968
0.1000
0.1034
0.1071
0.1111
0.1153
0.1198
0.1245
0.1295
0.1347
0.1402
0.1459
0.1519
0.1581
0.1645
0.1712
0.1781
0.1852
0.1925
0.2001
0.2078
0.2157
0.2239
0.2322
0.2407
0.2494
0.2583
0.2673
0.2765
0.2859
0.2954
0.3051
0.3149
0.3249
0.3350
0.3452
0.3555
0.3659
0.3765
0.3871
0.3979
0.4087
0.4196
0.4305
0.4416
0.4527
0.4638
0.4750
0.4863
0.4976
0.5089
0.5202
0.5315
0.5428
0.5542
0.5655
0.5768
0.5881
0.5993
0.6106
0.6217
0.6329
0.6439
0.6549
0.6659
0.6767
0.6875
0.6982
0.7088
0.7193
0.7297
0.7400
0.7501
0.7601
0.7700
0.7797
0.7893
0.7988
0.8081
0.8172
0.8262
0.8350
0.8436
0.8520
0.8602
0.8683
0.8761
0.8837
0.8912
0.8984
0.9054
0.9121
0.9187
0.9250
0.9311
0.9369
0.9426
0.9479
0.9530
0.9579
0.9625
0.9669
0.9710
0.9748
0.9784
0.9817
0.9847
0.9875
0.9899
0.9922
0.9941
0.9958
0.9972
0.9983
0.9991
0.9997
1.0000
1.0000
0.9997
0.9991
0.9983
0.9972
0.9958
0.9941
0.9922
0.9899
0.9875
0.9847
0.9817
0.9784
0.9748
0.9710
0.9669
0.9625
0.9579
0.9530
0.9479
0.9426
0.9369
0.9311
0.9250
0.9187
0.9121
0.9054
0.8984
0.8912
0.8837
0.8761
0.8683
0.8602
0.8520
0.8436
0.8350
0.8262
0.8172
0.8081
0.7988
0.7893
0.7797
0.7700
0.7601
0.7501
0.7400
0.7297
0.7193
0.7088
0.6982
0.6875
0.6767
0.6659
0.6549
0.6439
0.6329
0.6217
0.6106
0.5993
0.5881
0.5768
0.5655
0.5542
0.5428
0.5315
0.5202
0.5089
0.4976
0.4863
0.4750
0.4638
0.4527
0.4416
0.4305
0.4196
0.4087
0.3979
0.3871
0.3765
0.3659
0.3555
0.3452
0.3350
0.3249
0.3149
0.3051
0.2954
0.2859
0.2765
0.2673
0.2583
0.2494
0.2407
0.2322
0.2239
0.2157
0.2078
0.2001
0.1925
0.1852
0.1781
0.1712
0.1645
0.1581
0.1519
0.1459
0.1402
0.1347
0.1295
0.1245
0.1198
0.1153
0.1111
0.1071
0.1034
0.1000
0.0968
0.0939
0.0913
0.0889
0.0868
0.0850
0.0835
0.0822
0.0813
0.0806
0.0801
0.0800]';



for b = 0:h:(h*(cols-1))
  ft = d(:,1+b/h)';
  ft = [ft, conj(ft([((ftsize/2)):-1:2]))];
  px = real(ifft(ft));
  x((b+1):(b+ftsize)) = x((b+1):(b+ftsize))+px.*win;
end;