OpenCL的整体工作流程

这篇文章的重点是啥:

关于OpenCL整体工作流程的文章有很多,比较好的有知乎华叔的文章OpenCL 2.0 异构计算。流程他们已经讲得很详细了,我就不简单重复了,这篇博客主要是弄一个可运行代码,然后,强调一些我觉得重要的东西

OpenCL的工作流程

创建并执行一个简单的OpenCL应用大致需要以下几步:

  • 查询平台和设备信息
  • 创建一个上下文
  • 为每个设备创建一个命令队列
  • 创建一个内存对象(数组)用于存储数据
  • 拷贝输入数据到设备端
  • 使用OpenCL C代码创建并编译出一个程序
  • 从编译好的OpenCL程序中提取内核
  • 执行内核
  • 拷贝输出数据到主机端
  • 释放资源
    在下面的代码中,很容易找到对应的代码。

简单的可运行代码

这里复用了前面Windows下环境配置中的代码,运行时输出Result: 3即为正确运行。

#include <CL/cl.h>
#include <iostream>

const int N = 1024;
const size_t size = N * N * sizeof(float);

const char* getErrorString(cl_int error) {
	switch (error) {
		// run-time and JIT compiler errors
	case 0: return "CL_SUCCESS";
	case -1: return "CL_DEVICE_NOT_FOUND";
	case -2: return "CL_DEVICE_NOT_AVAILABLE";
	case -3: return "CL_COMPILER_NOT_AVAILABLE";
	case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
	case -5: return "CL_OUT_OF_RESOURCES";
	case -6: return "CL_OUT_OF_HOST_MEMORY";
	case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
	case -8: return "CL_MEM_COPY_OVERLAP";
	case -9: return "CL_IMAGE_FORMAT_MISMATCH";
	case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
	case -11: return "CL_BUILD_PROGRAM_FAILURE";
	case -12: return "CL_MAP_FAILURE";
	case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
	case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
	case -15: return "CL_COMPILE_PROGRAM_FAILURE";
	case -16: return "CL_LINKER_NOT_AVAILABLE";
	case -17: return "CL_LINK_PROGRAM_FAILURE";
	case -18: return "CL_DEVICE_PARTITION_FAILED";
	case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";

		// compile-time errors
	case -30: return "CL_INVALID_VALUE";
	case -31: return "CL_INVALID_DEVICE_TYPE";
	case -32: return "CL_INVALID_PLATFORM";
	case -33: return "CL_INVALID_DEVICE";
	case -34: return "CL_INVALID_CONTEXT";
	case -35: return "CL_INVALID_QUEUE_PROPERTIES";
	case -36: return "CL_INVALID_COMMAND_QUEUE";
	case -37: return "CL_INVALID_HOST_PTR";
	case -38: return "CL_INVALID_MEM_OBJECT";
	case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
	case -40: return "CL_INVALID_IMAGE_SIZE";
	case -41: return "CL_INVALID_SAMPLER";
	case -42: return "CL_INVALID_BINARY";
	case -43: return "CL_INVALID_BUILD_OPTIONS";
	case -44: return "CL_INVALID_PROGRAM";
	case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
	case -46: return "CL_INVALID_KERNEL_NAME";
	case -47: return "CL_INVALID_KERNEL_DEFINITION";
	case -48: return "CL_INVALID_KERNEL";
	case -49: return "CL_INVALID_ARG_INDEX";
	case -50: return "CL_INVALID_ARG_VALUE";
	case -51: return "CL_INVALID_ARG_SIZE";
	case -52: return "CL_INVALID_KERNEL_ARGS";
	case -53: return "CL_INVALID_WORK_DIMENSION";
	case -54: return "CL_INVALID_WORK_GROUP_SIZE";
	case -55: return "CL_INVALID_WORK_ITEM_SIZE";
	case -56: return "CL_INVALID_GLOBAL_OFFSET";
	case -57: return "CL_INVALID_EVENT_WAIT_LIST";
	case -58: return "CL_INVALID_EVENT";
	case -59: return "CL_INVALID_OPERATION";
	case -60: return "CL_INVALID_GL_OBJECT";
	case -61: return "CL_INVALID_BUFFER_SIZE";
	case -62: return "CL_INVALID_MIP_LEVEL";
	case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
	case -64: return "CL_INVALID_PROPERTY";
	case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
	case -66: return "CL_INVALID_COMPILER_OPTIONS";
	case -67: return "CL_INVALID_LINKER_OPTIONS";
	case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";

		// extension errors
	case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
	case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
	case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
	case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
	case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
	case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
	default: return "Unknown OpenCL error";
	}
}

void CheckError(cl_int error) {
	if (error != CL_SUCCESS) {
		std::cerr << "Error occurred at file: " << __FILE__ << ", line: " << __LINE__<<" with status "<< getErrorString(error) << std::endl;
		exit(1);
	}
}

int main() {
    // 初始化输入矩阵
    float* A = new float[N * N];
    float* B = new float[N * N];
    for (size_t i = 0; i < N * N; i++)
    {
        A[i] = 1.0f;
        B[i] = 2.0f;
    }

    cl_platform_id platform;
    cl_int status;

    status=clGetPlatformIDs(1, &platform, NULL);
	CheckError(status);
    cl_device_id device;
	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
	CheckError(status);
    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
	CheckError(status);
    cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, 0, &status);
	CheckError(status);

    // 创建OpenCL内存缓冲区
    cl_mem bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status);
    cl_mem bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status);
    cl_mem bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, size, NULL, &status);
	CheckError(status);

    // 将输入数据传输到OpenCL缓冲区
	status = clEnqueueWriteBuffer(queue, bufferA, CL_TRUE, 0, size, A, 0, NULL, NULL);
	status = clEnqueueWriteBuffer(queue, bufferB, CL_TRUE, 0, size, B, 0, NULL, NULL);
	CheckError(status);

    const char* source = "__kernel void add_matrices(__global const float* A, __global const float* B, __global float* C) { int id = get_global_id(0); C[id] = A[id] + B[id]; }";
    cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, &status);
	CheckError(status);
	status = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
	CheckError(status);
    cl_kernel kernel = clCreateKernel(program, "add_matrices", &status);
	CheckError(status);

    // 设置OpenCL内核参数
	status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA);
	status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB);
	status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC);
	CheckError(status);

    // 启动内核
    size_t globalWorkSize[2] = { N, N };
	status = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL);
	CheckError(status);

    // 读取结果数据
	status = clEnqueueReadBuffer(queue, bufferC, CL_TRUE, 0, size, A, 0, NULL, NULL);
	CheckError(status);

    // 清理OpenCL资源
	status = clReleaseMemObject(bufferA);
	status = clReleaseMemObject(bufferB);
	status = clReleaseMemObject(bufferC);
	status = clReleaseProgram(program);
	status = clReleaseKernel(kernel);
	status = clReleaseCommandQueue(queue);
	status = clReleaseContext(context);
	CheckError(status);

    // 打印结果
    std::cout << "Result: " << A[1023] << std::endl;

    delete[] A;
    delete[] B;

    return 0;
}

保留了之前逻辑的同时,此处的重要区别是,引入了void CheckError(cl_int error)函数,用于校验每个环节执行结束后的状态值。

整体流程中需要注意的点

  • 在本地调试阶段,建议监控每一个可能出错位置的状态值
    • 状态值是OpenCL反馈错误的唯一方式,如果没有状态值,排查错误几乎是不可能进行的;状态值可以作为排除错误的重要参考
    • 通常,OpenCL相关的错误一般都是可稳定复现的,当多次计算相同数据时状态值出错位置不同时,请优先考虑是否在其它环节有比较重大的内存泄漏
    • 每个返回状态值的函数,都会有关于状态值对应的原因的描述,因此,当返回状态值有误,且无法凭借状态值信息直接得知出错原因时,可以查看该函数官方描述中关于该状态值的信息,以排除当前可能存在的错误
  • 程序退出时记得释放系统资源
    • OpenCL中利用Create创建的资源都需要释放,例如,此处的MemObjectProgramKernelCommandQueueContext。创建的资源对系统而言是比较大的负担,当你在某个程序中多次创建资源而不释放时,系统内存占用将会显著增加直至无法创建新的资源。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值