本文介绍一下C语言使用Opencl的基本步骤,系统是windows10,IDE是Visual Studio2017。
step1:你需要安装一下opencl的windows版本,然后新建一个工程,在属性管理器里面给这个工程导入opencl的头文件。
step2:导入头文件
#include <iostream>
#include <fstream>
#include <sstream>
#include <CL/cl.h>
#include <opencv2/opencv.hpp> //我这里的核函数操作的是图像,所以需要opencv的包
#include <opencv2/highgui.hpp>
#include <math.h>
#include <iostream>
#include <time.h>
#include <string>
using namespace cv;
using namespace std;
#pragma warning( disable : 4996 )
step3:获取平台和上下文
cl_context CreateContext()
{
cl_int errNum;
cl_uint numPlatforms;
cl_platform_id firstPlatformId;
cl_context context = NULL;
//选择可用的平台中的第一个
errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
if (errNum != CL_SUCCESS || numPlatforms <= 0)
{
std::cerr << "Failed to find any OpenCL platforms." << std::endl;
return NULL;
}
//创建一个OpenCL上下文环境
cl_context_properties contextProperties[] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)firstPlatformId,
0
};
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
NULL, NULL, &errNum);
return context;
}
step4:创建设备并创建命令队列
cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)
{
cl_int errNum;
cl_device_id *devices;
cl_command_queue commandQueue = NULL;
size_t deviceBufferSize = -1;
// 获取设备缓冲区大小
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
if (deviceBufferSize <= 0)
{
std::cerr << "No devices available.";
return NULL;
}
// 为设备分配缓存空间
devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
//char name_data[100];
//clGetDeviceInfo(devices[0], CL_DEVICE_NAME,sizeof(name_data), name_data, NULL);
//printf("device:%s\n", name_data);
//选取可用设备中的第一个
commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
*device = devices[0];
//delete[] devices;
return commandQueue;
}
step5:构建并且编译program
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
cl_int errNum;
cl_program program;
std::ifstream kernelFile(fileName, std::ios::in);
if (!kernelFile.is_open())
{
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
return NULL;
}
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,
(const char**)&srcStr,
NULL, NULL);
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
return program;
}
step6:创建kernel
// 一、选择OpenCL平台并创建一个上下文
cl_context context = CreateContext();
// 二、 创建设备并创建命令队列
cl_command_queue commandQueue = CreateCommandQueue(context, &device);
cl_event histEvent = 0;
//创建和构建程序对象
cl_program program = CreateProgram(context, device, "main.cl");
cl_kernel kernel = clCreateKernel(program, "kernel_rgb2gray", NULL);
step7:新建文件main.cl,在里面新建一个核函数
__kernel void kernel_rgb2gray(__global unsigned char * rgbImage,
__global int* result)
{
int x = get_global_id(0);
int y = get_global_id(1);
int index = x * height + y;
result[index] = rgbImage[index];
}
step8:为核函数设置参数
Mat image = imread("D://b.jpg");
int imgSize = image.rows * image.cols;
Mat srcImage;
cvtColor(image, srcImage, CV_BGR2GRAY);
cl_mem memResult = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(int)*imgSize, NULL, NULL);
cl_mem memRgbImage = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
sizeof(uchar) * 3 * imgSize, srcImage.data, NULL);
errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memRgbImage);
errNum = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memResult);
step9:执行核函数
int img_h = img.rows;
int img_w = img.cols;
size_t globalThreads[2] = { img_w, img_h };
//执行内核函数
errNum = clEnqueueNDRangeKernel(commandQueue, kernel, 2, 0, globalThreads, NULL,
0, NULL, &histEvent);
step10:获取执行结果
int result* = new int[imgSize];
errNum = clEnqueueReadBuffer(commandQueue, memResult, CL_TRUE, 0,
sizeof(float)*img_h*img_w, result, 1, &histEvent, NULL);