OpenCL已经听说很久了,但是一直也没有机会学,现在机会终于来了。
因为效率问题,之前的所有算法都是放到GPU上跑的,现在需要添加一个multiband算法,结果麻烦大了,我整理的multiband算法代码全是按照cpu计算的思想写的,由于对于gpu运算的概念比较少,只能临时抱佛脚了。
入手肯定是找个最熟悉的开始,什么最熟悉?肯定是OpenCV啦。什么,OpenCV里自带Opencl的相关东西,那太好了。
为了测试,先弄的简单点,找一幅图,能放进GPU里随便算算就好了。
在网上找啊找,终于找到了一小段直接就能用的代码,源码地址https://gist.github.com/atinfinity/8c25c8fb1b3708aa0944
我的开发环境是VS2013+OpenCV2.4.10。
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/ocl/ocl.hpp>
// cl_mem構造体を参照するためにインクルード
#if defined __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include <iostream>
int main(int argc, const char** argv)
{
cv::ocl::DevicesInfo devInfo;
int res = cv::ocl::getOpenCLDevices(devInfo);
if (res == 0)
{
std::cerr << "There is no OPENCL Here !" << std::endl;
return -1;
}
else
{
for (unsigned int i = 0; i < devInfo.size(); ++i)
{
std::cout << "Device : " << devInfo[i]->deviceName << " is present" << std::endl;
}
}
cv::ocl::setDevice(devInfo[0]); // select device to use
std::cout << CV_VERSION_EPOCH << "." << CV_VERSION_MAJOR << "." << CV_VERSION_MINOR << std::endl;
//const char *KernelSource = "\n" \
// "__kernel void negaposi_C1_D0( \n" \
// " __global uchar* input, \n" \
// " __global uchar* output) \n" \
// "{ \n" \
// " int i = get_global_id(0); \n" \
// " output[i] = 255 - input[i]; \n" \
// "}\n";
const char *KernelSource = "\n" \
"__kernel void negaposi_C1_D0( \n" \
" __global uchar* input, \n" \
" __global uchar* output) \n" \
"{ \n" \
" int i = get_global_id(0); \n" \
" //output[i] = input[i-1]/2; \n" \
"}\n";
//cv::Mat mat_src = cv::imread("D:/Practice/OpencvTest/Opencv2Test/PyramidPractice/lena.bmp", cv::IMREAD_GRAYSCALE);
cv::Mat mat_src = cv::imread("img/cltest1.jpg", cv::IMREAD_GRAYSCALE);
cv::Mat mat_dst;
if (mat_src.empty())
{
std::cerr << "Failed to open image file." << std::endl;
return -1;
}
unsigned int channels = mat_src.channels();
unsigned int depth = mat_src.depth();
int64 tt;
tt = cv::getTickCount();
cv::ocl::oclMat ocl_src(mat_src);
cv::ocl::oclMat ocl_dst(mat_src.size(), mat_src.type());
cv::ocl::ProgramSource program("negaposi", KernelSource);
std::size_t globalThreads[3] = { ocl_src.rows * ocl_src.step, 1, 1 };
std::vector<std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&ocl_src.data));
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&ocl_dst.data));
cv::ocl::openCLExecuteKernelInterop(cv::ocl::Context::getContext(),
program, "negaposi", globalThreads, NULL, args, channels, depth, NULL);
ocl_dst.download(mat_dst);
std::cout << "ocl time: " << (cv::getTickCount() - tt) / cv::getTickFrequency() * 1000 << std::endl;
cv::namedWindow("mat_src");
cv::namedWindow("mat_dst");
cv::imshow("mat_src", mat_src);
cv::imshow("mat_dst", mat_dst);
cv::waitKey(0);
cv::destroyAllWindows();
return 0;
}
上面最有用的是KernelSource后面的一串字符,它决定了GPU里每个像素怎么计算。