在常规端点检测函数部分做如下修改:
void CMyWaveView::NormalvadDefault()
{
CMyWaveDoc* pDoc = GetDocument();
ASSERT_VALID(pDoc);
if (!pDoc)
return;
OnPower();//修改一:预加重,加汉明窗 在methods类里
OnZero();
m_minForZcr=pDoc->minForOneZero;
m_maxForZcr=pDoc->maxForOneZero;
m_avgForZcr=pDoc->avgForOneZero;
int i, j, nCount;
nCount=pDoc->zero_one.size();
pDoc->tag_Norm.clear();
for (i=0;i < nCount;i++)
{
pDoc->tag_Norm.push_back(0);
}
double max, min, avg;
min=m_minForEnergy;
max=m_maxForEnergy;
avg=m_avgForEnergy;
double max_zero=pDoc->maxForOneZero;
double energy_high =avg*e1;
double energy_low =avg*0.04;//修改2:门限值
double energy_consonant=avg*e3;
double energy_suspect=avg*e4;
double zcrThreshold = m_avgForZcr+zz * (m_maxForZcr-m_avgForZcr);
m_EnergyLevel1=energy_high;
m_EnergyLevel2=energy_low;
m_EnergyLevel3=energy_consonant;
m_EnergyLevel4=energy_suspect;
m_ZcrThreshold=zcrThreshold;
/****寻找主音段**/
std::vector<int> voiceIndex;
for (i=0;i < nCount;i++)
{
if (pDoc->dataout_energy[i] > energy_high)
{
voiceIndex.push_back(i);
pDoc->tag_Norm[i]=1;
}
}
std::vector<int> sound;
int nums=voiceIndex.size();
sound.push_back(voiceIndex[0]);
for (i=1;i < nums-1;i++)
{
if (voiceIndex[i+1]-voiceIndex[i]>1)
{
sound.push_back(voiceIndex[i]);
sound.push_back(voiceIndex[i+1]);
}
}
sound.push_back(voiceIndex[nums-1]);
voiceIndex.clear();//释放voiceIndex容器内存
vector<int>(voiceIndex).swap(voiceIndex);
/********寻找中振幅和常规辅音段*****************/
int head, tail;
for (i=0;i<sound.size()/2;i++)
{
head=sound[2*i];
while ((head-1)>=0 && (pDoc->dataout_energy[head-1]>energy_low
||(pDoc->dataout_energy[head-1]>energy_consonant && pDoc->zero_one[head-1] > zcrThreshold)))
{
head--;
pDoc->tag_Norm.at(head)=1;
}
sound[2*i]=head;
tail=sound[2*i+1];
while ((tail+1)<nCount && pDoc->dataout_energy[tail+1] > energy_low)
{
tail++;
pDoc->tag_Norm.at(tail)=1;
}
sound[2*i+1]=tail;
}
tail=head=0;
/******寻找不依靠主音的广义振幅******/
std::vector<int>soundSuspect;
soundSuspect.clear();
for (i=1;i<nCount;i++) //从1开始 跳过第0帧 避免phone噪声初始干扰
{
if (pDoc->tag_Norm.at(i)==0 && (pDoc->dataout_energy[i]>energy_low
||(pDoc->dataout_energy[i]>energy_consonant && pDoc->zero_one[i]>zcrThreshold)))
soundSuspect.push_back(i);
}
/****根据广义振幅寻找疑似辅音***/
if (soundSuspect.size()>1)
{
int k1=0, k2=0;
std::vector<int>suspect_terminal;
suspect_terminal.clear();
suspect_terminal.push_back(soundSuspect[0]);
for (i=0;i<soundSuspect.size()-1;i++)
{ if (soundSuspect.at(i+1)-soundSuspect.at(i)>1)
{
suspect_terminal.push_back(soundSuspect[i]);
suspect_terminal.push_back(soundSuspect[i+1]);
}
}
suspect_terminal.push_back(soundSuspect[soundSuspect.size()-1]);
/******寻找疑似语音段***************/
for (i=0;i<suspect_terminal.size()/2;i++)
{
k1=suspect_terminal[2*i+1]-suspect_terminal[2*i]+1;
k2=0;
head=suspect_terminal[2*i];
tail=suspect_terminal[2*i+1]+1;
for (;pDoc->tag_Norm[tail]==0 && tail<nCount;tail++)
{
if (pDoc->dataout_energy[tail]>energy_consonant
||(pDoc->dataout_energy[tail]>energy_suspect&&pDoc->zero_one[tail]>zcrThreshold))
k2++;
else break;
}
if (k2<=k1 && pDoc->tag_Norm[tail]==1) /***右边连通主音段 如清辅音***/
{
for (j=head;j<tail;j++)
pDoc->tag_Norm[j]=1;
}
else if ( pDoc->tag_Norm[head-2]==1) /***左边联通主音段 比如后鼻音***/
{
if (k2<=k1)
{
if (pDoc->dataout_energy[head-1]>energy_suspect && pDoc->zero_one[head-1]>zcrThreshold)
pDoc->tag_Norm[head-1]=1; //判断前面一帧
for (j=head;j<tail;j++)
pDoc->tag_Norm[j]=1;
}
else
{
if (pDoc->dataout_energy[head-1]>energy_suspect && pDoc->zero_one[head-1]>zcrThreshold)
pDoc->tag_Norm[head-1]=1; //丢弃后面疑似帧
for (j=head;j<tail-k2;j++)
pDoc->tag_Norm[j]=1;
}
}
/***不连通 看出过零率与能量差距 比如男音落单低振幅***/
else if (k2<=k1 && pDoc->zero_one[head]>zcrThreshold
&& ( pDoc->dataout_energy[head]>1.5*energy_consonant || pDoc->dataout_energy[head+1]>1.5*energy_consonant))
{
if (pDoc->dataout_energy[head-1]>energy_suspect && pDoc->zero_one[head-1]>zcrThreshold)
pDoc->tag_Norm[head-1]=1; //判断前面一帧
for (j=head;j<tail;j++)
pDoc->tag_Norm[j]=1;
}
}
suspect_terminal.clear();
vector<int>(suspect_terminal).swap(suspect_terminal);
}//if大条件结束
/*******修改3:保存各分离段语音的头尾*/
int npoint=0;
for( i=0;i<pDoc->tag_Norm.size()-1;i++)
{
if(pDoc->tag_Norm[i]==0 && pDoc->tag_Norm[i+1] == 1)
vad[npoint].head=i+1;
else if(pDoc->tag_Norm[i]==1 && pDoc->tag_Norm[i+1] == 0)
vad[npoint++].tail=i;
}
double av_eng=0;
vector<double>avg_oneE;
int x1=0;
float gapv1,gapv2,gapv3,gapv4;
gapv1=gapv2=gapv3=gapv4=0;
for( i=0;i<npoint;i++)
{
/*for(x1=vad[i].head; x1 <vad[i].tail; x1++)
av_eng+=pDoc->dataout_energy[x1];
av_eng/=x1;
avg_oneE.push_back(av_eng);*/
for(x1=vad[i].head+6; x1 < vad[i].tail-5; x1++)//避开头尾
if( pDoc->dataout_energy[x1] < pDoc->dataout_energy[x1-1] && pDoc->dataout_energy[x1] < pDoc->dataout_energy[x1+1] ){
gapv1=(pDoc->dataout_energy[x1-1] - pDoc->dataout_energy[x1])/pDoc->dataout_energy[x1];
gapv2=(pDoc->dataout_energy[x1+1] - pDoc->dataout_energy[x1])/pDoc->dataout_energy[x1];
gapv3=abs(pDoc->dataout_energy[x1+1] - pDoc->dataout_energy[x1-1])/pDoc->dataout_energy[x1];
gapv4=pDoc->zero_one[x1+2]+pDoc->zero_one[x1+3]+pDoc->zero_one[x1+4]+ pDoc->zero_one[x1+5]-4*pDoc->zero_one[x1+1];
if(gapv1 < gapv2){ float ttmp=gapv1; gapv1=gapv2; gapv2=ttmp;}
if( gapv1 > 0.16 && gapv2 > 0.0568 && gapv3 >0.02
&& !(pDoc->zero_one[x1] < pDoc->zero_one[x1-1] && pDoc->zero_one[x1] > pDoc->zero_one[x1+1] )
&&( ( pDoc->zero_one[x1] < pDoc->zero_one[x1+1] && pDoc->zero_one[x1] < pDoc->zero_one[x1+2] && pDoc->zero_one[x1] < pDoc->zero_one[x1+3] )
||( gapv4 <10 && pDoc->zero_one[x1+1] <= pDoc->zero_one[x1+2] && pDoc->zero_one[x1+1] <= pDoc->zero_one[x1+3] && pDoc->zero_one[x1] <= pDoc->zero_one[x1+4] && pDoc->zero_one[x1] < pDoc->zero_one[x1+5] )
||( (pDoc->zero_one[x1+1] < pDoc->zero_one[x1+2] || (pDoc->zero_one[x1+1] == pDoc->zero_one[x1+2] && pDoc->zero_one[x1+2] <= pDoc->zero_one[x1+3]) )
&& (pDoc->zero_one[x1+2] < pDoc->zero_one[x1+3] || (pDoc->zero_one[x1] == pDoc->zero_one[x1+1] && pDoc->zero_one[x1+2] == pDoc->zero_one[x1+3] ) )
&& ! pDoc->zero_one[x1] == pDoc->zero_one[x1-1] )) )
pDoc->tag_Norm[x1]=0;
gapv1=gapv2=gapv3=0;
}
}
soundSuspect.clear();
vector<int>(soundSuspect).swap(soundSuspect);
}
其中 vad为:
struct Endpoint {
int head;
int tail;
}vad[500];