前言
这几天项目上要用到波形匹配并且计算匹配度,由于以前没有相关经验,遂找了一大堆声纹识别的算法(MFCC、DTW等)来看,发现和我的需求有点不一样,偶然看到一篇讲用欧氏距离进行图像识别匹配的博文,觉得这个才比较像我的需求,遂写了工程来验证,发现效果果然不错,写下来分享并作笔记。
1、欧几里得距离算法
欧氏距离算法原理其实很简单,用一个公式就能讲清楚了
d(x,y)=(√∑ni=1(xi−yi)2)
d
(
x
,
y
)
=
(
∑
i
=
1
n
(
x
i
−
y
i
)
2
)
用代码实现起来也很简单,如下
//typedef double T;
typedef int T;
class Distance
{
public:
Distance(int m, T array1[], T array2[]);
void distance(T a[], T b[]);
void set(T array1[], T array2[]);
double get()
{
return Result;
}
~Distance() {}
private:
int n;
T* a;//declare two objects
T* b;
T square;
double Result;// The result of Euclid's distanst
};
Distance::Distance(int m, T array1[], T array2[])
{
n = m;
set(array1, array2);
distance(a, b);
}
void Distance::set(T array1[], T array2[])
{
a = array1;
b = array2;
}
void Distance::distance(T a[], T b[])
{
square = 0;
for(int i = 0; i < n; i++)
{
square += (a[i] - b[i]) * (a[i] - b[i]);//计算欧几里得距离
}
Result = sqrt(square);
}
算法的直观原理说起来也让人很好接受,以三维空间为例,将两个点放进三维空间里,用这两个点的坐标根据欧几里得距离公式算出两点的距离,距离越近说明两点的三个坐标越相近。欧几里得距离是可以算n维的
这样我们在对比两个数组是否相似时就可以以此类推了,将两个数组的每个元素作为一个维度的坐标,将两个数组压缩成为n维空间中的两个小点(佛曰:芥子须弥。哈哈哈),看在这芥子空间中这两个点是不是挨得够近,如果够近,就说明两个数组相似度比较好。如果是在一长段波形a中查找是否有跟待查波形b相似的波形,那么只要不断地取出a中所有跟b等长的波形,压缩成好多点,把这些点与b压缩成的点分别求欧氏距离,哪个更近哪个匹配度就更高啦。
2、上代码
我的代码用■方块在控制台随机生成一个长波形,然后截取其中一段,来查找这段波形在长波形中的位置。为了体现出欧氏距离的牛逼,在截取完之后,用随机毛刺和形变分别处理原始的长波形(本来还有幅值的,觉得项目中用不到没加)来干扰查找。最后根据欧氏距离最小找到待查波形是从哪里截取出来的并和原始波形进行对比。
#include<iostream>
#include<stdio.h>
#include<vector>
#include<cmath>
#include <cstdio>
#include <iomanip>
using namespace std;
//用欧几里得距离计算波形匹配度
///
//typedef double T;
typedef int T;
class Distance
{
public:
Distance(int m, T array1[], T array2[]);
void distance(T a[], T b[]);
void set(T array1[], T array2[]);
double get()
{
return Result;
}
~Distance() {}
private:
int n;
T* a;//declare two objects
T* b;
T square;
double Result;// The result of Euclid's distanst
};
Distance::Distance(int m, T array1[], T array2[])
{
n = m;
set(array1, array2);
distance(a, b);
}
void Distance::set(T array1[], T array2[])
{
a = array1;
b = array2;
}
void Distance::distance(T a[], T b[])
{
square = 0;
for(int i = 0; i < n; i++)
{
square += (a[i] - b[i]) * (a[i] - b[i]);
}
Result = sqrt(square);
}
///
int main()
{
///一、生成总波形
int nTotalLen;
cout << "输入随机波形长度:" ;
cin >> nTotalLen;
int *nTotalArray = new int[nTotalLen];
int *nTotalArray_bkp = new int[nTotalLen];
for(int i = 0; i < nTotalLen; i++)
{
nTotalArray[i] = rand() / (RAND_MAX / 32);
}
cout << "模拟输出波形:" << endl;
for(int i = 0; i < nTotalLen; i++)
{
cout << setfill(' ') << setw(4) << i;
for(int nCubeLen = 0; nCubeLen < nTotalArray[i]; nCubeLen++)
{
cout << "■";
}
cout << endl;
nTotalArray_bkp[i] = nTotalArray[i]; //备份总波形
}
cout << endl << endl << endl;
///二、在总波形中截取一段作为需要查找的波形
int nStartPos, nCutLen;
cout << "输入截取波形起始点:" ;
cin >> nStartPos;
cout << "输入截取波形长度:" ;
cin >> nCutLen;
while(nCutLen > nTotalLen - nStartPos)
{
cout << "截取波形超长,重新输入截取波形长度:" ;
cin >> nCutLen;
}
cout << "模拟截取的波形:" << endl;
int *nCutArray = new int[nCutLen];
for(int i = 0; i < nCutLen; i++)
{
nCutArray[i] = nTotalArray[i + nStartPos];
cout << setfill(' ') << setw(4) << i;
for(int nCubeLen = 0; nCubeLen < nCutArray[i]; nCubeLen++)
{
cout << "■";
}
cout << endl;
}
cout << endl << endl << endl;
///三、为总波形添加干扰
//1.添加毛刺
int nHarmonicNum;
cout << "输入毛刺数量:" ;
cin >> nHarmonicNum;
while(nHarmonicNum > nTotalLen)
{
cout << "毛刺数量比总长还多,请重输:" ;
cin >> nHarmonicNum;
}
int *nHarmonic = new int[nHarmonicNum];
int nHarmonicStrength;
cout << "输入毛刺强度:" ;
cin >> nHarmonicStrength;
while(nHarmonicStrength > 32)
{
cout << "毛刺强度超过原赋值强度,可能不准,请重输:" ;
cin >> nHarmonicStrength;
}
for(int i = 0; i < nHarmonicNum; i++)//生成随机毛刺
{
nHarmonic[i] = rand() / (RAND_MAX / nHarmonicStrength);
}
int *nHarmonicPos = new int[nHarmonicNum];
for(int i = 0; i < nHarmonicNum; i++)//生成随机毛刺位置
{
nHarmonicPos[i] = rand() / (RAND_MAX / nTotalLen);
}
for(int i = 0; i < nHarmonicNum; i++)//添加毛刺
{
nTotalArray[nHarmonicPos[i]] += nHarmonic[i];
}
char chQuestion1;
cout << "需要输出干扰后波形吗(y/n):" ;
cin >> chQuestion1;
if(chQuestion1 == 'y')
{
for(int i = 0; i < nTotalLen; i++)
{
cout << setfill(' ') << setw(4) << i;
for(int nCubeLen = 0; nCubeLen < nTotalArray[i]; nCubeLen++)
{
cout << "■";
}
for(int nBlankLen = 0; nBlankLen < 40 - nTotalArray[i]; nBlankLen++)
{
cout << " ";
}
for(int nCubeLen = 0; nCubeLen < nTotalArray_bkp[i]; nCubeLen++)
{
cout << "■";
}
cout << endl;
}
}
cout << endl << endl << endl;
Distance distance(nTotalLen, nTotalArray, nTotalArray_bkp);
cout << "添加毛刺干扰后总波形与原来的欧几里得距离为:" << distance.get() << endl ;
cout << endl << endl << endl;
//2.添加比例形变
double dScale;
cout << "输入形变强度:" ;
cin >> dScale;
while(dScale > 5 || dScale < 0)
{
cout << "形变大于5或者小于0可能不准,请重输:" ;
cin >> dScale;
}
for(int i = 0; i < nTotalLen; i++)//添加形变
{
nTotalArray[i] = nTotalArray[i] * dScale ;
}
char chQuestion2;
cout << "需要输出形变后波形吗(y/n):" ;
cin >> chQuestion2;
if(chQuestion2 == 'y')
{
for(int i = 0; i < nTotalLen; i++)
{
cout << setfill(' ') << setw(4) << i;
for(int nCubeLen = 0; nCubeLen < nTotalArray[i]; nCubeLen++)
{
cout << "■";
}
for(int nBlankLen = 0; nBlankLen < 40 - nTotalArray[i]; nBlankLen++)
{
cout << " ";
}
for(int nCubeLen = 0; nCubeLen < nTotalArray_bkp[i]; nCubeLen++)
{
cout << "■";
}
cout << endl;
}
}
cout << endl << endl << endl;
//3.添加增幅
cout << endl << endl << endl;
///四、将总波形遍历,不断取出与截取波形等长的部分与截取波形作欧氏距离计算
int *nPickArray = new int[nCutLen];
typedef std::pair<int, double> paPoint;
typedef std::vector<paPoint> veDis;
veDis vedDistance;
for(int i = 0; i < nTotalLen - nCutLen; i++)
{
for(int j = 0; j < nCutLen; j++)
{
nPickArray[j] = nTotalArray[i + j];
}
Distance distance(nCutLen, nCutArray, nPickArray);
paPoint point;
point.first = i;
point.second = distance.get();
vedDistance.push_back(point);
}
int nMinPos = 0;
double dMinDis = vedDistance.at(0).second;
for(int i = 0; i < vedDistance.size(); i++)
{
if(vedDistance.at(i).second < dMinDis)
{
dMinDis = vedDistance.at(i).second;
nMinPos = i;
}
}
cout << "最小的欧几里得距离所在起始点为:" << nMinPos << endl ;
cout << "该点的欧几里得距离为:" << dMinDis << endl ;
cout << "截取波形与总波形中寻找到的波形对比如下(左侧为截取波形):" << endl ;
for(int i = 0; i < nCutLen; i++)
{
cout << setfill(' ') << setw(4) << i;
for(int nCubeLen = 0; nCubeLen < nCutArray[i]; nCubeLen++)
{
cout << "■";
}
for(int nBlankLen = 0; nBlankLen < 40 - nCutArray[i]; nBlankLen++)
{
cout << " ";
}
for(int nCubeLen = 0; nCubeLen < nTotalArray[i + nMinPos]; nCubeLen++)
{
cout << "■";
}
cout << endl;
}
cout << endl << endl << endl;
cout << "需要输出其它点的欧氏距离吗(y/n):" ;
cin >> chQuestion2;
if(chQuestion2 == 'y')
{
for(int i = 0; i < vedDistance.size(); i++)
{
cout << i << "处的欧几里得距离为:" << vedDistance.at(i).second << endl;
}
}
cout << endl << endl << endl;
bool bEnd = false;
while(!bEnd)
{
cout << "需要看其它点波形吗(y/n):" ;
cin >> chQuestion2;
if(chQuestion2 == 'y')
{
int nViewPos;
cout << "输入查看点:" ;
cin >> nViewPos;
for(int i = 0; i < nCutLen; i++)
{
cout << setfill(' ') << setw(4) << i;
for(int nCubeLen = 0; nCubeLen < nCutArray[i]; nCubeLen++)
{
cout << "■";
}
for(int nBlankLen = 0; nBlankLen < 40 - nCutArray[i]; nBlankLen++)
{
cout << " ";
}
for(int nCubeLen = 0; nCubeLen < nTotalArray[i + nViewPos]; nCubeLen++)
{
cout << "■";
}
cout << endl;
}
cout << endl << endl << endl;
}
else
{
bEnd = true;
}
}
delete[] nTotalArray;
delete[] nCutArray;
delete[] nPickArray;
return 0;
}
3、看结果
(1)没干扰下
随机生成波形
截取其中一段
毛刺为0,形变为1
结果如下:
这种当然是小case,接下来就上干扰啦。
(2)添加干扰
随机生成波形
截取波形
添加毛刺数量为50,毛刺强度为15(因为总点数为100,每点在0-32之间随机,因此这个干扰已经很大了)
左侧为变形后波形,右侧为原始波形,失真比较严重了
添加形变为1.5,即所有值放大为原来1.5倍
结果:找到波形起始点还是30处。。厉害了我的欧几里得。。
因为30处的欧氏距离依然是最小的,当然,干扰再大一点就失效了,特别是比例干扰,比毛刺干扰更严重
看一下另外一个欧氏距离也比较小的点的波形对比,好像是有点像,毕竟形变太严重了。