之前处理语音信息的时候,在网上找过相关资料,但是质量良莠不齐。这里,把自己以前写过的DTW算法与大家分享。理论知识,大家可以参考:https://www.cnblogs.com/flypiggy/p/3603192.html,https://www.cnblogs.com/flypiggy/p/3603192.html
#ifndef DTW_H
#define DTW_H
#include <iostream>
#include <vector>
using namespace std;
//we get the minimum from three data
double MIN3(double fdata,double sdata,double tdata)
{
double min1 = 0.0;
//double min2 = 0.0;
double result_min = 0.0;
min1 = fdata > sdata ? sdata:fdata;
result_min = tdata > min1 ? min1:tdata;
return result_min;
}
//we can get the dtw between two speech segment;
double dtw(vector<double> feature_matr1,int fm1_row,int fm1_col,vector<vector<double> > &feature_matr2,int fm2_row,int fm2_col)
{
int i,j,k,len;
double dist,tsub,tsum;
/*Initialize*/
//double *tempdist = new double[fm1_row * fm2_row];
vector<double> tempdist(fm1_row * fm2_row,0);
//double *dp = new double[fm1_row * fm2_row];
vector<double> dp(fm1_row * fm2_row,0);
if(fm1_col != fm2_col)//第1、2个矩阵的列数不相等
{
return -1;
}
len = fm1_row * fm2_row;
for (i = 0; i < fm1_row; i++)
for (j = 0; j < fm2_row; j++)
{ //下面计算的是第一个矩阵的第i行向量和第二个矩阵的j行向量的相似性,并储存在新建矩阵的[i][j]位置。
tsum = 0.0;//compute the Euclidean distance between two vector of frames
for (k = 0; k < fm2_col; k++)
{
tsub = feature_matr1[i*fm2_col+k] - feature_matr2[j][k];
tsum+= tsub * tsub;
}
tempdist[i*fm2_row+j] = tsum;
}
/*DTW*/
//We begin from top-left-corner, and the only direction are down,right,right-down.
dp[0] = tempdist[0];//top-left-corner initialize
for (i =1; i < fm1_row; i++)
dp[i*fm2_row] = tempdist[i*fm2_row] + dp[(i-1)*fm2_row];
for (i =1; i < fm2_row; i++)
dp[i] = tempdist[i] + dp[i-1];
for(i=1; i<fm1_row; i++)
{
for(j=1; j<fm2_row; j++)
{
dp[i*fm2_row+j] = tempdist[i*fm2_row+j] +
MIN3(dp[(i-1)*fm2_row+j],dp[(i-1)*fm2_row+j-1],dp[i*fm2_row+j-1]);
}
}
dist = dp[(i-1)*fm2_row+j-1];
return dist;
}
#endif // DTW_H