摘自:http://blog.sina.com.cn/s/blog_6a25068b0100m5m6.html
作者:幸福的理由
看了个稀里糊涂,一知半解,觉得以后又会忘记。珍藏下来。边理解边添加。
以下是mfcc_float.cpp的内容
#include "stdlib.h" #include "stdio.h" #include "math.h" #include "MFCC_float.h"
#define PI 3.14159265358979 #define PRE_EMP_FACTOR 0.98 #define ENE_FLOOR 1.0
//------------------------------------------------------------------------------------- //public functions
//constructor CMFCC::CMFCC(long aInfo, int aSampleRate, int aN, int aFl, int aFh, int aM, int aTypeOfFilterBank, int aP) { //general Info=aInfo; SampleRate=aSampleRate; //sample rate N=aN; M=aM; P=aP; //fft point number, filterbanks number, cepstrum order Fl=aFl; Fh=aFh; //the lowest and highest frequency (Hz) of all the filterbanks //pre-emphasize Alfa=PRE_EMP_FACTOR; xtmp=new double[N];
//FFT x=new COMPLEX[N]; X=new double[N]; HammingWin=NewHammingWin(N);
//triangular filterbanks if(aTypeOfFilterBank==MEL_SCALE) NewMelFilterBanks(SampleRate, Fl, Fh, N, M, FilterBanks, FilterBanksL, FilterBanksH); else NewLinFilterBanks(SampleRate, Fl, Fh, N, M, FilterBanks, FilterBanksL, FilterBanksH);
S=new double[M];
//DCT DctMatrix=NewDctMatrix(P,M);
//feature temp buffer pfFea=new double[P]; }
//destructor CMFCC::~CMFCC(void) { delete xtmp; delete x; delete X; delete HammingWin; delete FilterBanks[0]; delete FilterBanks; delete FilterBanksL; delete FilterBanksH; delete S; delete DctMatrix[0]; delete DctMatrix; delete pfFea; }
//print void CMFCC::Print(void) { int i,j; FILE *fp;
fp=fopen("HammingWin.txt", "wt"); for(i=0 ; i<N ; i++) fprintf(fp, "%.6f\n", HammingWin[i]); fclose(fp);
fp=fopen("FilterBanks.txt", "wt"); for(j=0 ; j<N/2 ; j++) { for(i=0 ; i<M ; i++) fprintf(fp, ".6f ", FilterBanks[i][j]); fprintf(fp, "\n"); } fclose(fp);
fp=fopen("FilterBanksLH.txt", "wt"); for(i=0 ; i<M ; i++) fprintf(fp, "%d\t%d\t%d\n", FilterBanksL[i], FilterBanksH[i], (FilterBanksH[i]-FilterBanksL[i]+1)); fclose(fp);
fp=fopen("DCTMatrix.txt", "wt"); for(i=0 ; i<P ; i++) { for(j=0 ; j<M ; j++) fprintf(fp, "%.6f\t", DctMatrix[i][j]); fprintf(fp, "\n"); } fclose(fp);
return; }
//extract one sentence of MFCC/LFCC int CMFCC::MFCCbySentence(short *pnWav, int SampleCount, float *&pfMFCC, int &FrmCount, int &Dim, int FrmWidth, int FrmOffst) { int i; if(FrmWidth>N || FrmOffst>FrmWidth) return FAILURE;
//compute frame count & dim of feature vector & allocate memory space FrmCount=(SampleCount-FrmWidth)/FrmOffst+1; if(FrmCount<=0) return FAILURE; if(Info&DCEPS) { if(FrmCount<=10) return FAILURE; }
Dim=P; if(Info&DCEPS) Dim+=P; if(Info&DDCEPS) Dim+=P; pfMFCC = new float[FrmCount*Dim];
//remove DC component RemoveDC(pnWav, SampleCount);
//extract one sentence of MFCC for(i=0 ; i<FrmCount ; i++) { MFCCbyFrame(&pnWav[i*FrmOffst], FrmWidth, &pfMFCC[i*Dim], P); }
//CMS if(Info&CMS) { CepsMeanSub(pfMFCC, FrmCount, Dim, P); }
//extract one sentence of DMFCC if(Info&DCEPS) { DMFCCbyFrame(&pfMFCC[0], &pfMFCC[0], &pfMFCC[0], &pfMFCC[1*Dim], &pfMFCC[2*Dim], &pfMFCC[P], P); DMFCCbyFrame(&pfMFCC[0], &pfMFCC[0], &pfMFCC[1*Dim], &pfMFCC[2*Dim], &pfMFCC[3*Dim], &pfMFCC[1*Dim+P], P); for(i=2 ; i<=FrmCount-3 ; i++) DMFCCbyFrame(&pfMFCC[(i-2)*Dim], &pfMFCC[(i-1)*Dim], &pfMFCC[i*Dim], &pfMFCC[(i+1)*Dim], &pfMFCC[(i+2)*Dim], &pfMFCC[i*Dim+P], P); DMFCCbyFrame(&pfMFCC[(FrmCount-4)*Dim], &pfMFCC[(FrmCount-3)*Dim], &pfMFCC[(FrmCount-2)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-2)*Dim+P], P); DMFCCbyFrame(&pfMFCC[(FrmCount-3)*Dim], &pfMFCC[(FrmCount-2)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim], &pfMFCC[(FrmCount-1)*Dim+P], P); }
//extract one sentence of DDMFCC if(Info&DDCEPS) { for(i=0 ; i<P ; i++) { pfMFCC[2*P+i] = pfMFCC[(FrmCount-1)*Dim+2*P+i] = 0; }
for(i=1 ; i<=FrmCount-2 ; i++) { DDMFCCbyFrame(&pfMFCC[(i-1)*Dim+P], &pfMFCC[i*Dim+P], &pfMFCC[(i+1)*Dim+P], &pfMFCC[i*Dim+2*P], P); } }
return SUCCESS; }
//compute one frame of MFCC/LFCC int CMFCC::MFCCbyFrame(short *pnWav, int FrmWid, float *pfMFCC, int CepsOrder) { int i;
if(FrmWid>N || CepsOrder!=P) return FAILURE;
//pre-emphasize 预加重 PreEmp(pnWav, FrmWid, xtmp, N, Alfa);
//apply hamming window 加窗(窗宽度, ApplyWindow(xtmp, HammingWin, N, xtmp); //FFT 做fft for(i=0 ; i<N ; i++) { x[i].real=xtmp[i]; //实部,虚部?? x[i].image=0; } FFTAmp(x, X, N);
//apply triangular windows & ln ApplyFilterBanks(X, N, S, M, FilterBanks, FilterBanksL, FilterBanksH);
//DCT ApplyDCT(S, M, pfFea, P, DctMatrix);
//double -> float for(i=0 ; i<CepsOrder ; i++) pfMFCC[i] = (float)pfFea[i]; return SUCCESS; }
//extract one frame of DMFCC void CMFCC::DMFCCbyFrame(float *Prev2Fea, float *Prev1Fea, float *CurrFea, float *Next1Fea, float *Next2Fea, float *DFea, int P) { int i;
for(i=0 ; i<P ; i++) DFea[i] = (-0.632456f*Prev2Fea[i]-0.316228f*Prev1Fea[i]+ \ 0.316228f*Next1Fea[i]+0.632456f*Next2Fea[i]);
return; }
//extract one frame of DDMFCC void CMFCC::DDMFCCbyFrame(float *PrevDFea, float *CurrDFea, float *NextDFea, float *DDFea, int P) { int i;
for(i=0 ; i<P ; i++) DDFea[i] = 0.7071f*(NextDFea[i]-PrevDFea[i]);
return; }
//------------------------------------------------------------------------------------- //private functions
//====== Pre-emphasize ======// void CMFCC::PreEmp(short *pnWav, int FrmWid, double *x, int N, double Alfa) { int i; int s=(N-FrmWid)/2; //start position of data in COMPLEX x[N] s=(512-帧宽)/2
//clear buffer for(i=0 ; i<N ; i++) { x[i]=0; }
//compute the first sample x[s] = pnWav[0]-Alfa*pnWav[0]; //y(n)=x(0)-a*x(0); 第一个点
//compute the following samples in one frame for(i=1 ; i<FrmWid ; i++) x[i+s] = pnWav[i]-Alfa*pnWav[i-1]; //每个帧宽中的余下点,x(i+s)=后一个wav-前一个wav*a
return; }
//====== Hamming window ======// //generate a hamming window double* CMFCC::NewHammingWin(int Len) { double* Win=new double[Len]; for(int i=0; i<Len; i++) Win[i]=(0.54-0.46*cos(2*PI*i/Len)); return Win; }
//applying a window void CMFCC::ApplyWindow(double *In, double *Win, int Len, double *Out) { for(int i=0; i<Len; i++) Out[i]=In[i]*Win[i]; //对于音频,乘以窗函数 };
//====== FFT ======// //Func: FFT 2DIT //In: COMPLEX *input.real; //one frame of wave data // int n; //Point number of FFT //Out: COMPLEX *input; //real part and image part after FFT 实部和虚部 void FFT(COMPLEX *input, int n) { int x, i, nv2, j, k, le, l, le1, ip, nm1; COMPLEX t, u, w; //复数
int ntemp=n; for(x=0; n>1; x++) n/=2; n=ntemp;
nv2=n/2; nm1=n-1; j=1;
for(i=1; i<=nm1; i++) { if(i<j) //i是前一个点,j是后一个点 { t.real = input[i-1].real; t.image = input[i-1].image; input[i-1].real = input[j-1].real; input[i-1].image = input[j-1].image; input[j-1].real = t.real; input[j-1].image = t.image; //后一个点被前一个点代替 }
k=nv2;
while(k<j) { j-=k; k/=2; } j+=k; }
le=1; for(l=1; l<=x; l++) { le*=2; le1=le/2; u.real = 1.0f; u.image = 0.0f; w.real = (float)cos(PI/le1); w.image = (float)-sin(PI/le1);
for(j=1; j<=le1; j++) { for(i=j; i<=n; i+=le) { ip = i+le1; t.real = input[ip-1].real*u.real-input[ip-1].image*u.image; t.image = input[ip-1].real*u.image+input[ip-1].image*u.real; input[ip-1].real = input[i-1].real-t.real; input[ip-1].image = input[i-1].image-t.image; input[i-1].real = t.real+input[i-1].real; input[i-1].image = t.image+input[i-1].image; }
t.real = u.real*w.real-u.image*w.image; t.image = u.image*w.real+u.real*w.image; u.real = t.real; u.image = t.image; } }
return; }
//Func: FFTAmp, compute the amplitude of spectrum 急速频率谱的幅度 //In: COMPLEX *x.real; //one frame of wave data 一帧的wav // int N; //point number of FFT 做fft的点数 //Out: double *Amp; //spectral amplitude of this frame 这一帧的谱幅度 void CMFCC::FFTAmp(COMPLEX *x, double *Amp, int N) //实部和虚部的平方和开方,求模 { //FFT FFT(x, N);
//sqrt 开方 for(int i=0 ; i<=N/2 ; i++) Amp[i]=sqrt(x[i].real*x[i].real + x[i].image*x[i].image);
return; }
//====== DCT ======// //generate the matrix of DCT, excluding 0'th dimension double** CMFCC::NewDctMatrix(int nP, int nM) { int p,m; double **Matrix=new double*[nP]; Matrix[0]=new double[nP*nM]; for(p=1 ; p<nP ; p++) Matrix[p]=&Matrix[0][p*nM];
for(p=0 ; p<nP ; p++) { for(m=0 ; m<nM ; m++) { Matrix[p][m]=cos(PI*p*(m+0.5)/nM); //including c0 // Matrix[p][m]=cos(PI*(p+1)*(m+0.5)/nM); //excluding c0 } }
return Matrix; }
//applying a transformation matrix 逆傅里叶变换 void CMFCC::ApplyDCT(double *In, int InDim, double *Out, int OutDim, double **Matrix) { int p,m;
for(p=0 ; p<OutDim ; p++) { Out[p]=0; for(m=0 ; m<InDim ; m++) { Out[p]+=In[m]*Matrix[p][m]; } }
return; }
//====== Generating Filterbanks on Mel-Scale & Linear-Scale ======// //FFT point to frequency in Hz double n2f(int n, int Fs, int N) { if(n<0||n>=N) return 0;
double f; f=(n+0.5)*Fs/N; return f; }
//frequency in Hz to FFT point int f2n(double f, int Fs, int N) { if(f<0||f>Fs) return 0;
int n; n=(int)(f*N/Fs+0.5); return n; }
//frequency in Hz to bark double f2b(double f) { if(f<0) return 0;
double b; b=1125*log(1+f/700.0); return b; }
//bark to frequency in Hz 将bark换成频率 double b2f(double b) { if(b<0) return 0;
double f; f=700*(exp(b/1125)-1); return f; }
//compute the central point of each filterbank on mel-scale //for M filterbanks, the central point shoule be extended to M+2 int* MelCentralPoint(int Fl, int Fh, int M, int Fs, int N) { int m; double F; int *CentralPoint=new int[M+2]; for(m=0 ; m<=M+1 ; m++) { //central frequency 中心频率,高-低加低,乘以m数/点数 F=b2f(f2b(Fl)+(f2b(Fh)-f2b(Fl))*m/(M+1)); //central point CentralPoint[m]=f2n(F, Fs, N); } return CentralPoint; }
//compute the central point of each filterbank on linear-scale //for M filterbanks, the central frequency shoule be extended to M+2 int* LinCentralPoint(int Fl, int Fh, int M, int Fs, int N) { int m; double F; int *CentralPoint=new int[M+2]; for(m=0 ; m<=M+1 ; m++) { //central frequency F=Fl+(Fh-Fl)*m*1.0/(M+1); //central point CentralPoint[m]=f2n(F, Fs, N); } return CentralPoint; }
//generate the one filter void GenTriangularFilter(double *FilterBank, int ns, int nm1, int nm2, int nm3, int ne) { int n;
for(n=ns ; n<nm1 ; n++) FilterBank[n]=0; for(n=nm1 ; n<nm2 ; n++) FilterBank[n]=1.0*(n-nm1)/(nm2-nm1); for(n=nm2 ; n<nm3 ; n++) FilterBank[n]=1.0*(nm3-n)/(nm3-nm2); for(n=nm3 ; n<ne ; n++) FilterBank[n]=0;
return; }
//Generate a set of mel-scale filterbanks 一个mel滤波器 //In: int Fs; //sample rate采样率 // int Fl; //the lowest frequency of all the filterbanks最低频率 // int Fh; //the highest frequency of all the filterbanks // int N; //FFT point fft点数 // int M; //number of filterbanks 滤波器的个数 //Out: double **FilterBanks; //Triangular filters 三角滤波器 // int *FilterBanksL; //Lower point number of filters 低一级的滤波器 // int *FilterBanksH; //Higher point number of filters 高一级的滤波器 void CMFCC::NewMelFilterBanks(int Fs, int Fl, int Fh, int N, int M, double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH) { int m;
//allocate memory space for filters 为滤波器分配空间 FilterBanksL=new int[M]; FilterBanksH=new int[M]; FilterBanks=new double*[M]; FilterBanks[0]=new double[M*N]; for(m=1 ; m<M ; m++) FilterBanks[m]=&FilterBanks[0][m*N]; //每个滤波器做傅里叶变换的点数是n
//compute the central points of filterbanks, totall M+2 points 计算滤波器的中心点,m个滤波器,是m+2个点 int *MelCenPoints=MelCentralPoint(Fl,Fh,M,Fs,N); for(m=0 ; m<M ; m++) { FilterBanksL[m]=MelCenPoints[m]; FilterBanksH[m]=MelCenPoints[m+2]; GenTriangularFilter(FilterBanks[m], 0, MelCenPoints[m], MelCenPoints[m+1], MelCenPoints[m+2], N-1); }
delete MelCenPoints;
return; }
//Generate a set of linear-scale filterbanks void CMFCC::NewLinFilterBanks(int Fs, int Fl, int Fh, int N, int M, double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH) { int m;
//allocate memory space for filters FilterBanksL=new int[M]; FilterBanksH=new int[M]; FilterBanks=new double*[M]; FilterBanks[0]=new double[M*N]; for(m=1 ; m<M ; m++) FilterBanks[m]=&FilterBanks[0][m*N];
//compute the central points of filterbanks, totall M+2 points int *LinCenPoints=LinCentralPoint(Fl,Fh,M,Fs,N); for(m=0 ; m<M ; m++) { FilterBanksL[m]=LinCenPoints[m]; FilterBanksH[m]=LinCenPoints[m+2]; GenTriangularFilter(FilterBanks[m], 0, LinCenPoints[m], LinCenPoints[m+1], LinCenPoints[m+2], N-1); }
delete LinCenPoints;
return; }
//====== Applying Filterbanks & Log Compression ======// 加滤波器,对数化 void CMFCC::ApplyFilterBanks(double *X, int N, double *S, int M, double **FilterBanks, int *FilterBanksL, int *FilterBanksH) { int m,n; int nl, nh;
for(m=0 ; m<M ; m++) { //get the lowest and the highest point number of mth filterbank 得到第m个滤波器的最低和最高点 nl=FilterBanksL[m]; nh=FilterBanksH[m];
//error if(nl<0 || nh>=N || nl>=nh) { S[m]=0; continue; }
//filter the signal with mth filterbank S[m]=0; for(n=nl ; n<=nh ; n++) S[m]+=X[n]*FilterBanks[m][n];
//log if(S[m]<ENE_FLOOR) S[m]=ENE_FLOOR; S[m]=log(S[m]); }
return; }
//====== CMS by Sentence ======// 得到均值 void CMFCC::CepsMeanSub(float *pfMFCC, int FrmCount, int Dim, int CepsOrder) { int i,j;
float *Mean=new float[CepsOrder];
for(i=0 ; i<CepsOrder ; i++) { Mean[i]=0; for(j=0 ; j<FrmCount ; j++) Mean[i]+=pfMFCC[j*Dim+i]; Mean[i]/=FrmCount; }
for(i=0 ; i<CepsOrder ; i++) for(j=0 ; j<FrmCount ; j++) pfMFCC[j*Dim+i]-=Mean[i];
delete Mean;
return; }
//====== Remove DC component ======// void CMFCC::RemoveDC(short *pnWav, int SampleCount) { if(pnWav==NULL || SampleCount<=0) return;
int i; float sum=0, wav;
for (i=0 ; i<SampleCount ; i++) { sum += pnWav[i]; } sum /= SampleCount;
for (i=0 ; i<SampleCount ; i++) { wav = pnWav[i]-sum; pnWav[i] = (short)wav; }
return; }
以下是mfcc_float.h的内容
由此可看出,本程序是mfcc特征和差分mfcc的结合,先提取mfcc,在对提取到的特征向量做差分,进一步剥离特征,目的是为了得到说话人言语和速度的变化啊啥的巴拉巴拉。看了一遍程序,总容易各种分心,太多不知道所以看得很晕。这个盒子,总算打开了。
首先,是预加重,是说为了减少嘴唇和声带的效应影响,加重高频部分,理解来就是消除唇部的摩擦,对真实的语音的频率做补偿。方法是y(n)=x(n)-a*x(n-1).这里a是,alfa系数,在0.9到1之间,一般取0.98。
其次是加窗分帧。加窗,又设计到语音的特征巴拉巴拉。语音在长范围内是不停变动的,没有固定的特性无法做处理,所以加个窗,窗外的值设定为0,这样就吧要处理的部分固定在窗内,做处理。这样的窗将分析帧,一般取10-30ms做为窗,为了避免窗边界对信号的遗漏,要对帧做偏移时候,要有帧迭,所以一般取帧长的一半作为帧移。加的窗是汉明窗。公式是在加窗范围内,
w(n)=0.54-0.46*cos(2*pi*n/(n-1)).用汉明窗是因为在时域,是信号乘以窗函数,所以它两端是平滑过渡到0,不像矩形窗变化剧烈,而在频率它能保留高频成分,保留细节信息。
再次就是提取特征了。步骤为先离散傅里叶变换,将信号变换到频域上。这里涉及到dft变换的过程,是实部虚部分别作处理。程序中DFT一段需要细细研究,没看懂。
离散傅里叶变换后得到信号的频谱,然后对它的幅度做平方,就是能量谱。
啥是能量谱?
在这一步步的追溯到太多问题了,查阅资料也各种明白了点,但更多的是一知半解。收集以后一点点明白。http://longer.spaces.eepw.com.cn/articles/article/item/71979
【对于能量信号,常用能量谱来描述。所谓的能量谱,也称为能量谱密度,是指用密度的概念表示信号能量在各频率点的分布情况。也即是说,对能量谱在频域上积分就可以得到信号的能量。能量谱是信号幅度谱的模的平方,其量纲是焦/赫。对于功率信号,常用功率谱来描述。所谓的功率谱,也称为功率谱密度,是指用密度的概念表示信号功率在各频率点的分布情况。也就是说,对功率谱在频域上积分就可以得到信号的功率。从理论上来说,功率谱是信号自相关函数的傅里叶变换。因为功率信号不满足傅里叶变换的条件,其频谱通常不存在,维纳-辛钦定理证明了自相关函数和傅里叶变换之间对应关系。在工程实际中,即便是功率信号,由于持续的时间有限,可以直接对信号进行傅里叶变换,然后对得到的幅度谱的模求平方,再除以持续时间来估计信号的功率谱。】 又学习了。
啥是能量信号?
【当且仅当f(t)在所有时间上的能量不为0且有限时,该信号为能量信号,即(1)式中的 T 趋于无穷大的时候E为有限。典型的能量信号如方波信号、三角波信号等。但是有些信号不满足能量信号的条件,如周期信号和能量无限的随机信号,此时就需要用功率来描述这类信号。当且仅当x(t)在所有时间上的功率不为0且有限时,该信号为功率信号】一般来说,周期信号和随机信号是功率信号,而非周期的确定信号是能量信号。
因为语音信号是非周期的,且有人的能量。所以它是能量信号。
啥是能量?啥是功率?
功率是单位时间做的功,单位时间的能量,能量是连续时间内的功,能量就是对功率在一段时间内做积分。
再回去,得到能量谱后,用一组mel尺度的三角形滤波器组对能量谱做滤波。滤波器的个数为24-40个。 每个滤波器有个中心频率。m个数小的时候他们间隔小,m多的时候间隔大。他们有交叠,当前滤波器的中心频率是上一个的最高频率,也是下一个的最低频率。
这样得到每个滤波器的输出,滤波器的传递函数有公式。因为是mel尺度的频率,要化为线性频率,所以有个f2b.
每个滤波器的中心频率是线性的。用这些滤波器来模拟人的听觉特性,将线性频率转化为mel频率,是B(f)=1125ln(1+f/700)。这样。就可以计算出对应的mel频率滤波器界限。进而得到滤波器函数,乘以能量谱,再取对数就是滤波器的输出。
然后,对输出做dct变换,是离散余弦变换。
啥是离散余弦变换?dct?
看了百度,有点理解。说是形成一个dct矩阵,左上角是重要部分,右下角是非重要部分,甚至可以抛弃,这样对压缩很有用,捡重要部分来压缩。矩阵的求解有公式。
自此,mfcc特征提取完成了。
还有很多原理上的,编程上的疑问再解决。
问:傅里叶变换过程,滤波器,数字信号处理。。很多流程。。明天理解。
//---------------------------------------------------------------------------------------- #ifndef __MFCCH_FLOAT_H__ #define __MFCCH_FLOAT_H__
#define SUCCESS 1 #define FAILURE 0
//---------------------------------------------------------------------------------------- #define MEL_SCALE 0 #define LIN_SCALE 1
#define CMS 0x0001 //parameters for long Info; #define DCEPS 0x0002 #define DDCEPS 0x0004
//complex structure 复数 struct COMPLEX { double real; double image; };
//MFCC extraction class MFCC提取类 class CMFCC { public: //采样率16KHZ,频率为从100到7000,24个滤波器,其余参数不解其意??? //constructor, allocate memory & generate pre-set parameters 构造函数,初始参数 CMFCC(long aInfo=7, int aSampleRate=16000, int aN=512, int aFl=100, int aFh=7000, int aM=24, int aTypeOfFilterBank=0, int aP=12); //destructor, release memory space 析构函数,释放内存 ~CMFCC(void); //print void Print(void);
//Func: extract one sentence of MFCC int MFCCbySentence(short *pnWav, int SampleCount, float *&pfMFCC, int &FrmCount, int &Dim, int FrmWidth=512, int FrmOffst=256);
//Func: extract one frame of MFCC //In: short *pnWav; //one frame of wave data // int FrmWid; //frame width //Out: float *pfMFCC; //one frame of MFCC // int CepsOrder; //order of cepstrum //Ret: flag of SUCCESS or FAILURE int MFCCbyFrame(short *pnWav, int FrmWid, float *pfMFCC, int CepsOrder); //Func: extract one frame of DMFCC //In: float *Prev2Fea; //feature vector of previous second frame // float *Prev1Fea; //feature vector of previous first frame // float *CurrFea; //feature vector of current frame // float *Next1Fea; //feature vector of next first frame // float *Next2Fea; //feature vector of next second frame // int P; //order of delta cepstrum //Out: float *DFea; //delta cepstrum of current frame void DMFCCbyFrame(float *Prev2Fea, float *Prev1Fea, float *CurrFea, float *Next1Fea, float *Next2Fea, float *DFea, int P);
//Func: extract one frame of DDMFCC //In: float *PrevDFea; //delta feature vector of previous frame // float *CurrDFea; //delta feature vector of current frame // float *NextDFea; //delta feature vector of next frame // int P; //order of delta delta cepstrum //Out: float *DDFea; //delta delta cepstrum of current frame void DDMFCCbyFrame(float *PrevDFea, float *CurrDFea, float *NextDFea, float *DDFea, int P);
private: //--- general parameters ---// long Info; int SampleRate; //sample rate int N, P; //point number of FFT, order of cepstrum
//--- pre-emphasize ---// double Alfa; //factor of pre-emphasize double *xtmp;//double xtmp[N], temp buffer for x[N] //pre-emphasize void PreEmp(short *pnWav, int FrmWid, double *x, int N, double Alfa);
//--- FFT ---// double *HammingWin; //double HammingWin[N], weights of hamming window
//generate the weights of a hamming window double* NewHammingWin(int Len); //apply a hamming window void ApplyWindow(double *In, double *Win, int Len, double *Out);
COMPLEX *x; //COMPLEX x[N], x.real is the input for FFT double *X; //double X[N], spectral amplitude of this frame, only [0,N/2) is efficient
//compute spectral amplitude void FFTAmp(COMPLEX *x, double *Amp, int N);
//--- triangular filterbanks & ln ---// int M; //number of triangular filters int Fl, Fh; //lowest and highest frequencies of all the filterbank, in Hz double *S; //double S[M], the output of filterbanks
double **FilterBanks; //double FilterBanks[M][N], the weights of each point and each filterbanks int *FilterBanksL; //int FilterBanksL[M], the lower point of each filterbanks int *FilterBanksH; //int FilterBanksH[M], the higher point of each filterbanks
//generate a set of filterbanks on mel-scale mel频率上 void NewMelFilterBanks(int Fs, int Fl, int Fh, int N, int M, double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH); //generate a set of filterbanks on linear-scale 线性频率上 void NewLinFilterBanks(int Fs, int Fl, int Fh, int N, int M, double **&FilterBanks, int *&FilterBanksL, int *&FilterBanksH); //apply triangular windows & ln void ApplyFilterBanks(double *X, int N, double *S, int M, double **FilterBanks, int *FilterBanksL, int *FilterBanksH);
//--- DCT ---// 离散余弦变换 double **DctMatrix; //double DctMatrix[P][M], the matrix of DCT
//generate the transform matrix of DCT double** NewDctMatrix(int nP, int nM); //DCT dct矩阵 void ApplyDCT(double *In, int InDim, double *Out, int OutDim, double **Matrix);
//--- CMS ---// //Cepstrum Mean Subtraction 差分倒谱系数的提取 void CepsMeanSub(float *pfMFCC, int FrmCount, int Dim, int CepsOrder);
//--- remove DC component ---// void RemoveDC(short *pnWav, int SampleCount);
//--- Operations on a whole sentence ---// double *pfFea; //double pfFea[P]; feature temp buffer };
//---------------------------------------------------------------------------------------- #endif
感言:果然过了很多年, 才慢慢觉得信号处理是件有意思的事情了