1 模型
2 部分代码
function trimmed_X = my_vad(x) %端点检测;输入为录入语音,输出为有用信号 Ini = 0.1; %初始静默时间 Ts = 0.01; %窗的时长 Tsh = 0.005; %帧移时长 Fs = 16000; %采样频率 counter1 = 0; %以下四个参数用来寻找起始点和结束点 counter2 = 0; counter3 = 0; counter4 = 0; ZCRCountf = 0; %用于存储过零率检测结果 ZCRCountb = 0; ZTh = 40; %过零阈值 w_sam = fix(Ts*Fs); %窗口长度 o_sam = fix(Tsh*Fs); %帧移长度 lengthX = length(x); segs = fix((lengthX-w_sam)/o_sam)+1; %分帧数 sil = fix((Ini-Ts)/Tsh)+1; %静默时间帧数 win = hamming(w_sam); Limit = o_sam*(segs-1)+1; %最后一帧的起始位置 FrmIndex = 1:o_sam:Limit; %每一帧的起始位置 ZCR_Vector = zeros(1,segs); %记录每一帧的过零点数 %短时过零点 for t = 1:segs ZCRCounter = 0; nextIndex = (t-1)*o_sam+1; for r = nextIndex+1:(nextIndex+w_sam-1) if (x(r) >= 0) && (x(r-1) >= 0) elseif (x(r) > 0) && (x(r-1) < 0) ZCRCounter = ZCRCounter + 1; elseif (x(r) < 0) && (x(r-1) < 0) elseif (x(r) < 0) && (x(r-1) > 0) ZCRCounter = ZCRCounter + 1; end end ZCR_Vector(t) = ZCRCounter; end %短时平均幅度 Erg_Vector = zeros(1,segs); for u = 1:segs nextIndex = (u-1)*o_sam+1; Energy = x(nextIndex:nextIndex+w_sam-1).*win; Erg_Vector(u) = sum(abs(Energy)); end IMN = mean(Erg_Vector(1:sil)); %静默能量均值(噪声均值) IMX = max(Erg_Vector); %短时平均幅度的最大值 I1 = 0.03 * (IMX-IMN) + IMN; %I1,I2为初始能量阈值 I2 = 4 * IMN; ITL = 100*min(I1,I2); %能量阈值下限,前面系数根据实际情况更改得到合适结果 ITU = 10* ITL; %能量阈值上限 IZC = mean(ZCR_Vector(1:sil)); stdev = std(ZCR_Vector(1:sil)); %静默阶段过零率标准差 IZCT = min(ZTh,IZC+2*stdev); %过零率阈值 indexi = zeros(1,lengthX); indexj = indexi; indexk = indexi; indexl = indexi; %搜寻超过能量阈值上限的部分 for i = 1:length(Erg_Vector) if (Erg_Vector(i) > ITU) counter1 = counter1 + 1; indexi(counter1) = i; end end ITUs = indexi(1); %第一个能量超过阈值上限的帧 %搜寻能量超过能量下限的部分 for j = ITUs:-1:1 if (Erg_Vector(j) < ITL) counter2 = counter2 + 1; indexj(counter2) = j; end end start = indexj(1)+1; %第一级判决起始帧 Erg_Vectorf = fliplr(Erg_Vector);%将能量矩阵关于中心左右对称,如果是一行向量相当于逆序 %重复上面过程相当于找结束帧 for k = 1:length(Erg_Vectorf) if (Erg_Vectorf(k) > ITU) counter3 = counter3 + 1; indexk(counter3) = k; end end ITUf = indexk(1); for l = ITUf:-1:1 if (Erg_Vectorf(l) < ITL) counter4 = counter4 + 1; indexl(counter4) = l; end end finish = length(Erg_Vector)-indexl(1)+1;%第一级判决结束帧 %从第一级判决起始帧开始进行第二判决(过零率)端点检测 BackSearch = min(start,25); for m = start:-1:start-BackSearch+1 rate = ZCR_Vector(m); if rate > IZCT ZCRCountb = ZCRCountb + 1; realstart = m; end end if ZCRCountb > 3 start = realstart; end FwdSearch = min(length(Erg_Vector)-finish,25); for n = finish+1:finish+FwdSearch rate = ZCR_Vector(n); if rate > IZCT ZCRCountf = ZCRCountf + 1; realfinish = n; end end if ZCRCountf > 3 finish = realfinish; end x_start = FrmIndex(start); %最终的起始位置 x_finish = FrmIndex(finish-1); %最终的结束位置 trimmed_X = x(x_start:x_finish);
3 仿真结果
4 参考文献
[1]张军, & 李学斌. (2009). 一种基于dtw的孤立词语音识别算法. 计算机仿真(10), 358-361+374.