使用的硬件:我在英伟达独立显卡上运行OpenCL代码,且在此之前已经配置过CUDA编程环境,因此,环境配置的过程会比较简单。
依赖的头文件和库:配置好CUDA环境后,在CUDA的相关资源中,通常会包含OpenCL所需的头文件!和库信息。例如,
更进一步地,库文件路径如下:
使用OpenCL时,只需要在在VC++目录
>> 常规
中的包含目录和库目录先添加相关路径,并在链接器
>> 输入
>>附加依赖项
中添加OpenCL.lib
即可
第一个OpenCL程序:做好以上准备后,输入以下代码,编译运行
#include <CL/cl.h>
#include <iostream>
const int N = 1024;
const size_t size = N * N * sizeof(float);
int main() {
// 初始化输入矩阵
float* A = new float[N * N];
float* B = new float[N * N];
for (size_t i = 0; i < N * N; i++)
{
A[i] = 1.0f;
B[i] = 2.0f;
}
cl_platform_id platform;
clGetPlatformIDs(1, &platform, NULL);
cl_device_id device;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, 0, NULL);
// 创建OpenCL内存缓冲区
cl_mem bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, NULL);
cl_mem bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, NULL);
cl_mem bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, size, NULL, NULL);
// 将输入数据传输到OpenCL缓冲区
clEnqueueWriteBuffer(queue, bufferA, CL_TRUE, 0, size, A, 0, NULL, NULL);
clEnqueueWriteBuffer(queue, bufferB, CL_TRUE, 0, size, B, 0, NULL, NULL);
const char* source = "__kernel void add_matrices(__global const float* A, __global const float* B, __global float* C) { int id = get_global_id(0); C[id] = A[id] + B[id]; }";
cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, NULL);
clBuildProgram(program, 1, &device, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "add_matrices", NULL);
// 设置OpenCL内核参数
clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC);
// 启动内核
size_t globalWorkSize[2] = { N, N };
clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
// 读取结果数据
clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, size, A, 0, NULL, NULL);
// 清理OpenCL资源
clReleaseMemObject(bufferA);
clReleaseMemObject(bufferB);
clReleaseMemObject(bufferC);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
// 打印结果
std::cout << "Result: " << A[1023] << std::endl;
delete[] A;
delete[] B;
return 0;
}
正常输出结果为:
Result: 3
这样,基本的运行环境就搭建完成,下一章节,将会讲一下OpenCL的工作流程和相关注意事项。!