1. 语音 信号的数学模型
根据语音产生的机理,可以将语音发声系统 分为3个子系统:在声门(声带)以下,称为“声门子系统”,它负责产生激励振动,是“激励子系统”;从声门到嘴唇的呼气通道是声道,是“声道系统”;语音从嘴唇辐射出去,所以嘴唇以外是“辐射系统”.人类 的发声是一个复杂的过程.
语音发声系统的完整数学模型可表示为:H(z)=G(z)V(z)R(z)
2. 特征 参数 提取
2.1 线性预侧系数LPC(Linear Predictor Coeffieient)
提取LPC特征的程序 如下:
function b = addwin(f)
% 此函数用来分帧后,在“加窗”的同时提取lpc倒谱参数
% f是分帧后得到的值,f = enframe(py, win, inc)
b = [];
for i = 1 : size(f, 1)
y = f(i, :); % 显示第i行的所有数据
p = y .* hamming(256); % 加窗
c = lpc(p); % 求lpc
d = cceps(c); % 求lpc倒谱
b = [b, d(2 : 13)]; % 取2到13个,一共12个
end
return
2.2 美尔倒谱系数MFC C(Mel Frequency Cepstrum Coefficients)
提取MFCC特征的代码:
function ccc = mfcc(x)
% 归一化mel滤波器组系数
bank = melbankm(24, 256, 8000, 0, 0.5, 'm');
bank = full(bank);
bank = bank / max(bank(:));
% DCT系数,12 * 24
for k = 1 : 12
n = 0 : 23;
dctcoef(k, :) = cos((2 * n + 1) * k * pi / (2 * 24));
end
% 归一化倒谱提升窗口
w = 1 + 6 * sin(pi * [1 : 12] ./ 12);
w = w / max(w);
% 预加重滤波器
xx = double(x);
xx = filter([1 - 0.9375], 1, xx);
% 语言信号分帧
xx = enframe(xx, 256, 80);
% 计算每帧的MFCC参数
for i = 1 : size(xx, 1)
y = xx(i, :);
s = y' .* hamming(256);
t = abs(fft(s));
t = t .^ 2;
c1 = dctcoef * log(bank * t(1 : 129));
c2 = c1 .* w';
m(i, :) = c2';
end
% 差分系数
dtm = zeros(size(m));
for i = 3 : size(m, 1) - 2
dtm(i, :) = -2 * m(i - 2, :) - m(i - 1, :) + m(i + 1, :) + 2 * m(i + 2, :);
end
dtm = dtm / 3;
% 合并mfcc参数和一阶差分mfcc参数
ccc = [m dtm];
% 去除首尾两帧,因为这两帧的一阶差分参数为0
ccc = ccc(3, size(m, 1) -2, :);
return
其中enframe的代码[2]如下:
function f=enframe(x,win,inc)
%ENFRAME split signal up into (overlapping) frames: one per row. F=(X,WIN,INC)
% F = ENFRAME(X,LEN) splits the vector X up into
% frames. Each frame is of length LEN and occupies
% one row of the output matrix. The last few frames of X
% will be ignored if its length is not divisible by LEN.
% It is an error if X is shorter than LEN.
% F = ENFRAME(X,LEN,INC) has frames beginning at increments of INC
% The centre of frame I is X((I-1)*INC+(LEN+1)/2) for I=1,2,...
% The number of frames is fix((length(X)-LEN+INC)/INC)
% F = ENFRAME(X,WINDOW) or ENFRAME(X,WINDOW,INC) multiplies
% each frame by WINDOW(:)
nx=length(x);
nwin=length(win);
if (nwin == 1)
len = win;
else
len = nwin;
end
if (nargin < 3)
inc = len;
end
nf = fix((nx-len+inc)/inc);
f=zeros(nf,len);
indf= inc*(0:(nf-1)).';
inds = (1:len);
f(:) = x(indf(:,ones(1,len))+inds(ones(nf,1),:));
if (nwin > 1)
w = win(:)';
f = f .* w(ones(nf,1),:);
end
加矩形窗的短时能量 函数:
a=wavread('F:/WO.wav');
subplot(6,1,1),plot(a);
N=32;
for i=2:6
h=linspace(1,1, (i-1)*N);
%形成一个矩形窗,长度为N
En=conv(h,a.*a);
%求卷积得其短时能量 函数En
subplot(6,1,i),plot(En);
if(i==2) legend('N=32');
elseif(i==3) legend('N=64');
elseif(i==4) legend('N=128');
elseif(i==5) legend('N=256');
elseif(i==6) legend('N=512');
end
end
加hamming窗的短时能量 函数:
把h=linspace(1,1, (i-1)*N);
改为h1=hamming((i-1)*N);
加矩形窗的短时平均幅度:
a=wavread('F:/WO.wav');
subplot(6,1,1),plot(a);
N=32;
for i=2:6
h=linspace(1,1,(i-1)*N);
%形成一个矩形窗,长度为N
En=conv(h,abs(a));
%求卷积得其短时能量 函数En
subplot(6,1,i),plot(En);
if(i==2) legend('N=32');
elseif(i==3) legend('N=64');
elseif(i==4) legend('N=128');
elseif(i==5) legend('N=256');
elseif(i==6) legend('N=512');
end
end
短时过零率:
a=wavread('F:/WO.wav');
n=length(a);
N=320;
subplot(3,1,1),plot(a);
h=linspace(1,1,N);%形成一个矩形窗,长度为N
En=conv(h,a.*a);%求卷积得其短时能量 函数En
subplot(3,1,2),plot(En);
for i=1:n-1
if a(i)>=0
b(i)= 1;
else
b(i) = -1;
end
if a(i+1)>=0
b(i+1)=1;
else
b(i+1)=-1;
end
w(i)=abs(b(i+1)-b(i));
end%求出每相邻两点符号的差值的绝对值
k=1;
j=0;
while (k+N-1)<n
Zm(k)=0;
for i=0:N-1;
Zm(k)=Zm(k)+w(k+i);
end
j=j+1;
k=k+160; %每次移动半个窗
end
for w=1:j
Q(w)=Zm(160*(w-1)+1)/640;%短时平均过零率
end
subplot(3,1,3),plot(Q);