基础对象对应关系
OpenCL C++ 对象 | OpenCL C 对应类型 | 创建函数示例 |
---|---|---|
cl::Platform | cl_platform_id | clGetPlatformIDs(1, &platform, NULL) |
cl::Device | cl_device_id | clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL) |
cl::Context | cl_context | clCreateContext(NULL, 1, &device, NULL, NULL, &context) |
cl::CommandQueue | cl_command_queue | clCreateCommandQueue(context, device, 0, &queue) |
cl::Program | cl_program | clCreateProgramWithSource(context, 1, &source, NULL, &program) |
cl::Kernel | cl_kernel | clCreateKernel(program, "kernel_name", &kernel) |
cl::Buffer | cl_mem | clCreateBuffer(context, flags, size, host_ptr, &err) |
cl::Image1D/2D/3D | cl_mem | clCreateImage2D(context, flags, &format, width, height, row_pitch, host_ptr, &err) |
主要API函数对比
1. 平台与设备
cpp
// C++
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
cl::Device device = cl::Device::getDefault();
// C
cl_platform_id platform;
cl_device_id device;
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL);
2. 上下文创建
cpp
// C++
cl::Context context(CL_DEVICE_TYPE_GPU);
// C
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
3. 命令队列
cpp
// C++
cl::CommandQueue queue(context, device);
// C
cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);
4. 程序与内核
cpp
// C++
cl::Program program(context, sources);
program.build("-cl-std=CL1.2");
cl::Kernel kernel(program, "vecAdd");
// C
cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, &err);
clBuildProgram(program, 1, &device, "-cl-std=CL1.2", NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "vecAdd", &err);
5. 缓冲区操作
cpp
// C++
cl::Buffer buffer(context, CL_MEM_READ_WRITE, size);
queue.enqueueWriteBuffer(buffer, CL_TRUE, 0, size, data);
// C
cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, size, NULL, &err);
clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, size, data, 0, NULL, NULL);
6. 内核执行
cpp
// C++
kernel.setArg(0, buffer);
queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(1024), cl::NDRange(128));
// C
clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
size_t global = 1024, local = 128;
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
关键差异说明
-
错误处理:
-
C++:使用异常机制
-
C:通过返回错误码,需要手动检查
-
-
对象管理:
-
C++:RAII自动管理资源
-
C:需要手动释放资源 (
clRelease*
函数)
-
-
参数传递:
-
C++:类型安全的封装
-
C:需要手动处理指针和大小
-
-
辅助功能:
-
C++:提供STL风格的便捷函数(如
cl::copy
) -
C:需要手动实现类似功能
-
完整C示例代码
c
#include <CL/cl.h>
#include <stdio.h>
int main() {
cl_int err;
// 1. 获取平台和设备
cl_platform_id platform;
cl_device_id device;
clGetPlatformIDs(1, &platform, NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
// 2. 创建上下文和命令队列
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);
// 3. 创建程序和内核
const char* source = "__kernel void vecAdd(__global float* a) { a[get_global_id(0)] += 1; }";
cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, &err);
clBuildProgram(program, 1, &device, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "vecAdd", &err);
// 4. 创建缓冲区
float data[1024] = {0};
cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(data), NULL, &err);
clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, sizeof(data), data, 0, NULL, NULL);
// 5. 设置参数并执行内核
clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
size_t global = 1024;
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
// 6. 读取结果
clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(data), data, 0, NULL, NULL);
// 7. 释放资源
clReleaseMemObject(buffer);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return 0;
}
完整C++示例代码
#include <CL/cl.hpp>
#include <iostream>
#include <vector>
int main() {
try {
// 1. 获取平台和设备
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.empty()) {
std::cerr << "No OpenCL platforms found!" << std::endl;
return EXIT_FAILURE;
}
cl::Platform platform = platforms[0];
std::cout << "Using platform: " << platform.getInfo<CL_PLATFORM_NAME>() << std::endl;
std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
if (devices.empty()) {
std::cerr << "No GPU devices found!" << std::endl;
return EXIT_FAILURE;
}
cl::Device device = devices[0];
std::cout << "Using device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
// 2. 创建上下文和命令队列
cl::Context context(device);
cl::CommandQueue queue(context, device);
// 3. 创建程序和内核
const std::string kernelSource = R"(
__kernel void vecAdd(__global float* a) {
int i = get_global_id(0);
a[i] += 1.0f;
}
)";
cl::Program::Sources sources;
sources.push_back({kernelSource.c_str(), kernelSource.length()});
cl::Program program(context, sources);
try {
program.build();
} catch (const cl::Error& e) {
std::string buildLog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
std::cerr << "Build error:\n" << buildLog << std::endl;
throw;
}
cl::Kernel kernel(program, "vecAdd");
// 4. 创建缓冲区
const size_t dataSize = 1024;
std::vector<float> data(dataSize, 0.0f);
cl::Buffer buffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
sizeof(float) * dataSize, data.data());
// 5. 设置参数并执行内核
kernel.setArg(0, buffer);
queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(dataSize), cl::NullRange);
// 6. 读取结果
queue.enqueueReadBuffer(buffer, CL_TRUE, 0, sizeof(float) * dataSize, data.data());
// 验证结果
bool success = true;
for (size_t i = 0; i < dataSize; ++i) {
if (data[i] != 1.0f) {
success = false;
break;
}
}
std::cout << "Computation " << (success ? "succeeded" : "failed") << std::endl;
} catch (const cl::Error& e) {
std::cerr << "OpenCL error: " << e.what() << " (" << e.err() << ")" << std::endl;
return EXIT_FAILURE;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}