opencl执行流程
![在这里插入图片描述](https://i-blog.csdnimg.cn/blog_migrate/2242cd36212af00760777ec0e9edef3e.png)
简单实例
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_TARGET_OPENCL_VERSION 200
#include<CL/cl2.hpp>
#include <string.h>
#include <iostream>
#include <fstream>
#include <sstream>
int main()
{
cl::Platform platforms = cl::Platform::getDefault();
cl::Context context = cl::Context(CL_DEVICE_TYPE_GPU, NULL);
cl_int errNum = CL_SUCCESS;
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
cl::CommandQueue cq = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE, &errNum);
if (errNum != CL_SUCCESS) {
std::cout << "error code " << errNum << std::endl;
return -1;
}
std::ifstream kernelFile("main.cl", std::ios::in);
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char* srcStr = srcStdStr.c_str();
auto program = cl::Program(context, srcStr, false, &errNum);
errNum = program.build(devices);
auto kernel = cl::Kernel(program, "kernel_func", &errNum);
if (errNum != CL_SUCCESS)
{
std::cout << "error in create kernel" << std::endl;
}
const int global_size = 2048;
int a[global_size];
int b[global_size];
for (int i = 0; i < global_size; ++i)
{
a[i] = i + 1;
b[i] = i + 1;
}
cl::Buffer mem_input_a(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int) * global_size, a, &errNum);
cl::Buffer mem_input_b(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int) * global_size, b, &errNum);
cl::Buffer mem_result = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * global_size, NULL, NULL);
errNum = kernel.setArg(0, sizeof(cl_mem), &mem_input_a);
errNum = kernel.setArg(1, sizeof(cl_mem), &mem_input_b);
errNum = kernel.setArg(2, sizeof(cl_mem), &mem_result);
errNum = cq.enqueueNDRangeKernel(kernel, cl::NDRange(0, 0), cl::NDRange(1024, 2), cl::NullRange, NULL, NULL);
int* result = new int[global_size];
errNum = cq.enqueueReadBuffer(mem_result, true, 0, sizeof(cl_int) * global_size, result, NULL, NULL);
for (int i = 0; i < global_size; ++i)
std::cout << result[i] << std::endl;
delete[]result;
system("pause");
return 0;
}
__kernel void kernel_func(__global int* a, __global int* b, __global int* result)
{
int index = get_global_id(0) + get_global_size(0) * get_global_id(1);
result[index] = a[index] + b[index];
}