近日有需求要写一个矩阵运算的算法,Matlab偶是不会的,于是像找一个C++的矩阵库。
偶然看到这一篇,我自己也实验呢一把。
代码还是不变:
// MatixTest.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
//#pragma warning(disable:4344)
//#define DEIGEN_NO_DEBUG
//#define DNDEBUG
#include <emmintrin.h>
#include <armadillo>
#include <Eigen/Dense>
#include <opencv.hpp>
#include <vector>
#include <iostream>
#include <Timer.h>
using namespace std;
#define TEST1 1
int main()
{
Timer timer; // timer
double elapsedTime; // time in millisecond
double res; // SAD value
int i; // loop variable
float bnd = 1e5; // loop times
// Armadillo
arma::mat armaA(4, 1);
arma::mat armaB(4, 1);
timer.start();
for (i = 0; i < bnd; ++i)
{
#if TEST1
res = arma::accu(arma::abs(armaA - armaB));
#else
res = 0;
for (int idx = 0; idx < 4; ++idx)
{
res += abs(armaA(idx, 0) - armaB(idx, 0));
}
#endif
}
elapsedTime = timer.getElapsedTimeInMilliSec();
cout << "arma time : " << elapsedTime << " ms" << endl;
// Eigen
Eigen::Vector4d eiA;
Eigen::Vector4d eiB;
Eigen::Vector4d eiC;
timer.start();
for (i = 0; i < bnd; ++i)
{
#if TEST1
res = (eiA - eiB).cwiseAbs().sum();
#else
res = 0;
for (int idx = 0; idx < 4; ++idx)
{
res += abs(eiA(idx,0) - eiB(idx, 0));
}
#endif
}
elapsedTime = timer.getElapsedTimeInMilliSec();
cout << "eigen time : " << elapsedTime << " ms" << endl;
// OpenCV
cv::Mat ocvA(4, 1, CV_64F);
cv::Mat ocvB(4, 1, CV_64F);
timer.start();
for (i = 0; i < bnd; ++i)
{
#if TEST1
res = cv::sum(cv::abs(ocvA - ocvB))[0];
#else
res = 0;
for (int idx = 0; idx < 4; ++idx)
{
res += abs(ocvA.at<double>(idx, 0) - ocvB.at<double>(idx, 0));
}
#endif
}
elapsedTime = timer.getElapsedTimeInMilliSec();
cout << "opencv time : " << elapsedTime << " ms" << endl;
// pointer operation
double *a = (double*)_mm_malloc(4 * sizeof(double), 16);
double *b = (double*)_mm_malloc(4 * sizeof(double), 16);
int len = ocvA.rows;
printf("len = %d\n", len);
timer.start();
for (i = 0; i < bnd; ++i)
{
res = 0;
for (int idx = 0; idx < len; ++idx)
{
res += abs(a[idx] - b[idx]);
}
//cout<<"i = "<<i<<endl;
}
elapsedTime = timer.getElapsedTimeInMilliSec();
cout << "array operation : " << elapsedTime << " ms" << endl;
// release resource
_mm_free(a);
_mm_free(b);
return 0;
}
只不过Armadillo和Eigen都是最新的,编译器是VS2015.3,Win10 1083。
Debug下的结果如下:
***** VIDEOINPUT LIBRARY - 0.1995 - TFW07 *****
arma time : 102.004 ms
eigen time : 195.304 ms
opencv time : 718.789 ms
len = 4
array operation : 8.15559 ms
请按任意键继续. . .
WTF,eigen怎么成这样,说好的和array operation差不多的呢?
再看Release:
arma time : 1.21401 ms
eigen time : 0 ms
opencv time : 255.893 ms
len = 4
array operation : 0 ms
请按任意键继续. . .
这TM的是根本不耗时的意思吗?
最终,我只得祭出大杀器:tbb来了
// MatixTest.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
//#pragma warning(disable:4344)
//#define DEIGEN_NO_DEBUG
//#define DNDEBUG
#include <emmintrin.h>
#include <armadillo>
#include <Eigen/Dense>
#include <opencv.hpp>
#include <vector>
#include <iostream>
#include <Timer.h>
#include <tbb/tick_count.h>
using namespace std;
#define TEST1 1
int main()
{
//Timer timer; // timer
//double elapsedTime; // time in millisecond
double res; // SAD value
int i; // loop variable
float bnd = 1e5; // loop times
// Armadillo
arma::mat armaA(4, 1);
arma::mat armaB(4, 1);
//timer.start();
tbb::tick_count start = tbb::tick_count::now();
for (i = 0; i < bnd; ++i)
{
#if TEST1
res = arma::accu(arma::abs(armaA - armaB));
#else
res = 0;
for (int idx = 0; idx < 4; ++idx)
{
res += abs(armaA(idx, 0) - armaB(idx, 0));
}
#endif
}
//elapsedTime = timer.getElapsedTimeInMilliSec();
//cout << "arma time : " << elapsedTime << " ms" << endl;
tbb::tick_count stop = tbb::tick_count::now();
cout << "arma time : " << (stop - start).seconds() << " sec" << endl;
// Eigen
Eigen::Vector4d eiA;
Eigen::Vector4d eiB;
Eigen::Vector4d eiC;
start = tbb::tick_count::now();
for (i = 0; i < bnd; ++i)
{
#if TEST1
res = (eiA - eiB).cwiseAbs().sum();
#else
res = 0;
for (int idx = 0; idx < 4; ++idx)
{
res += abs(eiA(idx,0) - eiB(idx, 0));
}
#endif
}
stop = tbb::tick_count::now();
cout << "eigen time : " << (stop - start).seconds() << " sec" << endl;
// OpenCV
cv::Mat ocvA(4, 1, CV_64F);
cv::Mat ocvB(4, 1, CV_64F);
start = tbb::tick_count::now();
for (i = 0; i < bnd; ++i)
{
#if TEST1
res = cv::sum(cv::abs(ocvA - ocvB))[0];
#else
res = 0;
for (int idx = 0; idx < 4; ++idx)
{
res += abs(ocvA.at<double>(idx, 0) - ocvB.at<double>(idx, 0));
}
#endif
}
stop = tbb::tick_count::now();
cout << "opencv time : " << (stop - start).seconds() << " sec" << endl;
// pointer operation
double *a = (double*)_mm_malloc(4 * sizeof(double), 16);
double *b = (double*)_mm_malloc(4 * sizeof(double), 16);
int len = ocvA.rows;
printf("len = %d\n", len);
start = tbb::tick_count::now();
for (i = 0; i < bnd; ++i)
{
res = 0;
for (int idx = 0; idx < len; ++idx)
{
res += abs(a[idx] - b[idx]);
}
//cout<<"i = "<<i<<endl;
}
stop = tbb::tick_count::now();
cout << "array operation : " << (stop - start).seconds() << " sec" << endl;
// release resource
_mm_free(a);
_mm_free(b);
return 0;
}
Debug的结果:
***** VIDEOINPUT LIBRARY - 0.1995 - TFW07 *****
arma time : 0.0932793 sec
eigen time : 0.190308 sec
opencv time : 0.683494 sec
len = 4
array operation : 0.00781852 sec
请按任意键继续. . .
Release的结果:
arma time : 0.001249 sec
eigen time : 0 sec
opencv time : 0.230224 sec
len = 4
array operation : 0 sec
请按任意键继续. . .
这到底是怎么回事呢?真的是运行太快没间隔吗?还是我哪里写错了呢?