文件 "mfcc.cpp"
#include < math.h >
#include < stdlib.h >
mfcc::mfcc()
... {
melbank();
calcu_hamming();
cal_stren_win();
calcu_dct();
qi=1;
int i,j;
for(i=0;i<5;i++)
for(j=1;j<=256;j++)
...{
buf[i][j]=0;
}
for(i=0;i<=256;i++)
...{
queue[i]=0; workingi[i]=0;
}
Pos=0;
}
/**/ /*%归一化倒谱提升窗口
w=1+6*sin(pi*[1:12]./12);
w=w/max(w);
*/
void mfcc::proc()
... {
int i,j;
double sum;
double proc_buf1[25];
// for(i=256;i>=1;i--)working[i]=working[i]-0.9375*working[i-1];
for(i=256;i>=1;i--)working[i]=working[i]-working[i-1];
// s=y'.*hamming(256);
for(i=1;i<=256;i++)working[i-1]=working[i]*hamming[i];
//同时错一格。
// t=abs(fft(s));
fft();
for(i=256;i>=1;i--)
...{working[i]=working[i-1]*working[i-1]+workingi[i-1]*workingi[i-1];
workingi[i-1]=0;
}
for(i=1;i<=24;i++)
...{
sum=0;
for(j=1;j<=129;j++)
...{
sum=sum+x[i][j]*working[j];
}
if(sum!=0)
proc_buf1[i]=log(sum);
else proc_buf1[i]=-3000;
}
for(i=1;i<=12;i++)
...{
sum=0;
for(j=1;j<=24;j++)
...{
sum=sum+dctcoef[i][j]*proc_buf1[j];
}
buf[Pos][i]=sum*stren_win[i];
}
Pos=(Pos+1)%5;
for(i=1;i<=12;i++)...{
result[i]=buf[(Pos+2)%5][i];
// dtm(i,:)=-2*m(i-2,:)-m(i-1,:)+m(i+1,:)+2*m(i+2,:);
result[i+12]=-2*buf[(Pos)%5][i]
-buf[(Pos+1)%5][i]+buf[(Pos+3)%5][i]+2*buf[(Pos+4)%5][i];
result[i+12]=result[i+12]/3;
}
}
void mfcc::cal_stren_win()
... {
int i;
double b=0.0;
for(i=1;i<=12;i++)...{
stren_win[i]=1+6*sin(pi*(double)i/(double)12);
if(b<stren_win[i])b=stren_win[i];
}
for(i=1;i<=12;i++)...{
stren_win[i]=stren_win[i]/b;
}
}
void mfcc::calcu_hamming()
... {
int i;
for(i=1;i<=256;i++)...{
hamming[i]=0.54-0.46*cos(2*pi*(i-1)/(256-1));
}
}
void mfcc::read( short * data)
... {
int i;
for(i=0;i<INC_MFCC;i++)...{
queue[qi]=(double)data[i];
qi=qi%257+1;
}
Level=0.0;
for(i=0;i<257;i++)...{
working[i]=queue[(qi+i+256)%257+1];
Level=Level+fabs(working[i]);
}
}
void mfcc:: set ()
... {
int i;
for(i=0;i<257;i++) working[i]=i;
// working[1]=1;
}
void mfcc::calcu_dct()
... {
/**//* %DCT系数,12*24
for k=1:12
n=0:23;
dctcoef(k,:)=cos((2*n+1)*k*pi/(2*24));
end
*/
int k,n;
for(k=1;k<=12;k++)
for(n=0;n<=23;n++)
...{
dctcoef[k][n+1]=cos((double)(2*n+1)*k*pi/(double)(2*24));
}
}
void mfcc::melbank()
... {
double f0,fn2,lr;
int b1,b2,b3,b4,k2,k3,k4,mn,mx;
double bl[5];
double pf[LEN_PF_MFCC],fp[LEN_PF_MFCC],pm[LEN_PF_MFCC],v[LEN_PF_MFCC];
int r[LEN_PF_MFCC],c[LEN_PF_MFCC];
int i,j;
for(i=0;i<SIZE_X_X_MFCC;i++)...{
for(j=0;j<SIZE_X_Y_MFCC;j++)...{
x[i][j]=0.0;
}
}
f0=(double)700/(double)FS_MFCC;
// fn2=floor(n/2);
fn2=floor((double)N_MFCC/2);
lr=log((f0+FH_MFCC)/(f0+FL_MFCC))/(P_MFCC+1.0);
/**///// convert to fft bin numbers with 0 for DC term
// bl=N*((f0+FL)*exp([0 1 p p+1]*lr)-f0);
bl[1]=N_MFCC*((f0+FL_MFCC)*exp(0*lr)-f0);
bl[2]=N_MFCC*((f0+FL_MFCC)*exp(1*lr)-f0);
bl[3]=N_MFCC*((f0+FL_MFCC)*exp(P_MFCC*lr)-f0);
bl[4]=N_MFCC*((f0+FL_MFCC)*exp((P_MFCC+1)*lr)-f0);
b2=(int)ceil(bl[2]);
b3=(int)floor(bl[3]);
/**//*if any(w=='y')
pf=log((f0+(b2:b3)/n)/(f0+fl))/lr;
fp=floor(pf);
r=[ones(1,b2) fp fp+1 p*ones(1,fn2-b3)];
c=[1:b3+1 b2+1:fn2+1];
v=2*[0.5 ones(1,b2-1) 1-pf+fp pf-fp ones(1,fn2-b3-1) 0.5];
mn=1;
mx=fn2+1;
else*/
b1=(int)floor(bl[1])+1;
b4=(int)__min(fn2,ceil(bl[4]))-1;
k2=b2-b1+1;
k3=b3-b1+1;
k4=b4-b1+1;
mn=b1+1;
mx=b4+1;
// pf=log((f0+(b1:b4)/n)/(f0+fl))/lr;
for(i=1,j=b1;j<=b4;i++,j++)...{
pf[i]=log(((double)f0+(double)i/(double)N_MFCC)/(f0+FL_MFCC))/lr;
// fp=floor(pf);
fp[i]=floor(pf[i]);
// pm=pf-fp;
pm[i]=pf[i]-fp[i];
}
// k2=b2-b1+1;
// k3=b3-b1+1;
// k4=b4-b1+1;
// r=[fp(k2:k4) 1+fp(1:k3)];
// c=[k2:k4 1:k3];
// v=2*[1-pm(k2:k4) pm(1:k3)];
for(i=1,j=k2;j<=k4;i++,j++)...{
r[i]=(int)fp[j];
c[i]=j;
v[i]=2*(1-pm[j]);
}
for(j=1;j<=k3;j++,i++)...{
r[i]=1+(int)fp[j];
c[i]=j;
v[i]=2*pm[j];
}
// mn=b1+1;
// mx=b4+1;
//if any(w=='n')
// v=1-cos(v*pi/2);
//elseif any(w=='m')
// v=1-0.92/1.08*cos(v*pi/2);
for(j=1;j<i;j++)...{
v[j]=1-0.92/1.08*cos(v[j]*pi/2);
x[r[j]][c[j]+mn-1]=v[j];
}
//end
//if nargout > 1
// x=sparse(r,c,v);
//else
// x=sparse(r,c+mn-1,v,p,1+fn2);
//end
double buf;
for(i=1;i<=24;i++)
for(j=1;j<=129;j++)
if(x[i][j]>buf)buf=x[i][j];
for(i=1;i<=24;i++)
for(j=1;j<=129;j++)
x[i][j]=x[i][j]/buf;
}
void mfcc::fft()
... {
int i,j,k,n,nv2,nm1,l,le,le1,ip,sign=-1;
double tr,ti,ur,ui,wr,wi;
double *xr;
double *xi;
xr=working;
xi=workingi;
n=1<<8;
nv2=n>>1;
nm1=n-1;
j=0;
for(i=0;i<nm1;i++)
...{
if(i<j)...{
tr=xr[j];
ti=xi[j];
xr[j]=xr[i];
xi[j]=xi[i];
xr[i]=tr;
xi[i]=ti;
}
k=nv2;
while(k<=j)...{
j-=k;
k=k>>1;
}
j+=k;
}
for(l=1;l<=8;l++)
...{
le=1<<l;
le1=le/2;
ur=1.;
ui=0.;
wr=cos(pi/le1);
wi=-sin(pi/le1);
for(j=0;j<le1;j++)
...{
for(i=j;i<n;i+=le)
...{
ip=i+le1;
tr=xr[ip]*ur-xi[ip]*ui;
ti=xr[ip]*ui+xi[ip]*ur;
xr[ip]=xr[i]-tr;
xi[ip]=xi[i]-ti;
xr[i]=xr[i]+tr;
xi[i]=xi[i]+ti;
}
tr=ur*wr-ui*wi;
ti=ur*wi+ui*wr;
ur=tr;
ui=ti;
}
}
return;
}
int mfcc::report_begin()
... {
if(Level>THRESHOUD_BEGIN*257)return(1);
else return(0);
}
int mfcc::report_end()
... {
if(Level<THRESHOUD_END*257)return(1);
else return(0);
}
文件"mfcc.h"
#define MFCC_H_XPP
#define X_M_MFCC
#define X_N_MFCC
#define FL_MFCC 0 // low end of the
// lowest filter as a fraction of fs (default = 0)
#define FH_MFCC 0.5 // high end of
// highest filter as a fraction of fs (default = 0.5)
#define FS_MFCC 8000 // sample rate in Hz
#define N_MFCC 256 // length of fft
#define P_MFCC 24 // number of filters in filterbank
#define LEN_PF_MFCC 256
#define pi 3.141592653589
#define SIZE_X_X_MFCC 32
#define SIZE_X_Y_MFCC 140
#define SIZE_DCT_X_MFCC 13
#define SIZE_DCT_Y_MFCC 25
#define INC_MFCC 80
#define THRESHOUD_BEGIN 350
#define THRESHOUD_END 250
class mfcc ... {
private:
void melbank();
double x[SIZE_X_X_MFCC][SIZE_X_Y_MFCC];
double dctcoef[SIZE_DCT_X_MFCC][SIZE_DCT_Y_MFCC];
void calcu_dct();
double hamming[257];
void calcu_hamming();
void cal_stren_win();
double stren_win[13];
int Pos;
double buf[5][25];
double queue[300];
double working[257];
double workingi[257];
void fft();
int qi;
double Level;
public:
double result[25];
void proc();
void set();
void read(short *);
int report_begin();
int report_end();
mfcc();
} ;
#endif
样例程序:
void main( int argc, char * argv[])
... {
mfcc test;
while(1)...{
// 将长度80的语音缓冲区刷新的操作。这里简略。
test.read(SpeechBuffer);//SpeechBuffer是从第0个有效的一段80个语音样点。
//语音样点是短整型16位PCM信号,
test.proc();
/**//* 接下来,对象test提供3个结果如下:
test.result;test.result是从第一个有效的,并非从第0个有效,
//是24个参数,前12个是主参数,后12个是查分参数。
test.report_begin()语音起始标志
test.report_end()语音结束标志。
*/
}
}
来源:
以上程序是我在学(精华区x-3-1-6)课程的
时候做大作业时自己写的一个类。
当时我是根据Matlab中MFCC的源码中翻译成C++的。
在现在的Matlab中没有这个函数,
得要到
http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
上把相应的语音库当下来,
然后,
自己编一个Matlab程序清单如下:
文件"mfcc.m"
function ccc=mfcc(x)
%归一化mel滤波器组系数
bank=melbankm(24,256,8000,0,0.5,'m');
bank=full(bank);
bank=bank/max(bank(:));
%DCT系数,12*24
for k=1:12
n=0:23;
dctcoef(k,:)=cos((2*n+1)*k*pi/(2*24));
end
%归一化倒谱提升窗口
w=1+6*sin(pi*[1:12]./12);
w=w/max(w);
%预加重滤波器
xx=double(x);
xx=filter([1 -1],1,xx);
%语音信号分帧
%xx=enframe(xx,256,80);
xppl=length(xx);
j=1;
for i=65:80:xppl-256,
xx1(j,:)=xx(i:i+256-1)';
j=j+1;
end
xx=xx1;
%计算每帧的MFCC参数
for i=1:size(xx,1)
y=xx(i,:);
s=y'.*hamming(256);
t=abs(fft(s));
t=t.^2;
t=t+2*realmin;
c1=dctcoef*log(bank*t(1:129));
c2=c1.*w';
m(i,:)=c2';
end
%差分参数
dtm=zeros(size(m));
for i=3:size(m,1)-2
dtm(i,:)=-2*m(i-2,:)-m(i-1,:)+m(i+1,:)+2*m(i+2,:);
end
dtm=dtm/3;
%合并mfcc参数和一阶差分mfcc参数
ccc=[m dtm];
%去除首尾两帧,因为这两帧的一阶差分参数为0
ccc=ccc(3:size(m,1)-2,:);
return