Docker工具边学边用[3]: 在ubuntu22.04系统中搭建opencl运行环境

1. 背景及需求说明

        由于项目调试的需要,笔者需要临时在docker镜像中,对部分高耗时算子进行OpenCL加速。悲催的是,从docker hub官网拉下来的ubuntu22.04镜像中尚未包含相关模块,上述愿望直接落空。为了解决上述问题,笔者特地翻阅了多篇博文,并将主要流程及相关测试代码整理如下。

        需补充说明的是,该文主要针对的是NVIDIA GPU及Intel核显设备进行的测试及代码验证,其余计算设备(如AMD系列显卡)在该文中并未涉及。

2. 步骤说明

* step-1: ubuntu2204宿主机上,安装nvidia-container-runtime库

* step-2: 创建docker file文件,并构建镜像

* step-3: 运行镜像

* step-4: demo代码验证

3. 具体实现细节
* step-1: nvidia-container-runtime
sudo apt install curl
curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | \
  sudo apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
  sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
sudo apt update
sudo apt install nvidia-container-runtime
* step-2: dockerfile
FROM ubuntu:22.04
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y upgrade \
  && apt-get install -y \
    g++ \
    apt-utils \
    unzip \
    tar \
    curl \
    xz-utils \
    ocl-icd-libopencl1 \
    ocl-icd-opencl-dev \
    opencl-headers \
    clinfo \
    ;

RUN mkdir -p /etc/OpenCL/vendors && \
    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
* step-3: 镜像的构建指令如下:
$ docker build -t nvidia-opencl .
构建完成后的镜像信息,可通过"docker images"指令查看,结果如下:

* step-4: demo验证
#include <CL/cl.h>
#include <iostream>
#include <vector>

const std::string kernel_str = "__kernel void vector_add(global const float *a, global const float *b, global float *result) {		"
  			       " int gid = get_global_id(0);									 	"
  			       " result[gid] = a[gid] + b[gid];										"
			       "};													";

int main(){

  int err = 0;
  size_t size = 0;
  uint32_t platform_num = 0;
  cl_platform_id platform{};
  cl_context ctx{};
  cl_command_queue cmd_queue{};
  cl_device_id device{};
  std::string param_val;
  cl_program program;
  cl_kernel add_kernel;
  
  // step-1: platforms
  err = clGetPlatformIDs(0, nullptr, &platform_num);
  std::cout << "The num of platform is: " << platform_num << std::endl;
  
  std::vector<cl_platform_id> all_platforms;
  all_platforms.reserve(platform_num);
  err = clGetPlatformIDs(platform_num, all_platforms.data(), nullptr);
  
  for (int idx = 0; idx < platform_num; ++idx) {

    // get the size of current platform name
    err = clGetPlatformInfo(
        all_platforms[idx], CL_PLATFORM_NAME, 0, nullptr, &size);
    param_val.resize(size);

    // get the string of current platform name
    err = clGetPlatformInfo(all_platforms[idx],
                                       CL_PLATFORM_NAME,
                                       size,
                                       const_cast<char *>(param_val.data()),
                                       nullptr);

    std::cout << "The platform name is: " <<  param_val << std::endl;

    // NOTE: for 'CL_PLATFORM_NAME', you may get the following three items:
    // (1) NVIDIA CUDA / (2) Intel(R) OpenCL Graphics / (3) ARM Platform.
    // We chose intel graphics or arm firstly.
    if (param_val.find("Intel(R)") != std::string::npos ||
        param_val.find("ARM Platform") != std::string::npos) {

      // NOTE: for Intel and arm device, they are unable to exist meanwhile.
      platform = all_platforms[idx];
      break;
    }

    if (param_val.find("NVIDIA CUDA") != std::string::npos) {
      platform = all_platforms[idx];
    }

    // if the expected platform cannot still be found, print the error!
    if (platform == nullptr) {
      std::cout <<
          "The expected OpenCL platform (Intel, Arm or Nvidia) cannot be found "
          "in your device. The cl stream cannot be created now, please "
          "reconfirm it again!" << std::endl;
    }
  }
  
  // step-2: devices
  uint32_t device_num = 0;

  std::vector<cl_device_id> all_devices;

  err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, nullptr, &device_num);
  if (0 == device_num) {

    std::cout << "The expected OpenCL devices cannot be found in your device, "
                 "please reconfirm it again!" << std::endl;
  }
  std::cout << "The device num is: " << device_num << std::endl;
  
  // get the all the devices id
  all_devices.reserve(device_num);
  err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, device_num, all_devices.data(), nullptr);

  for (auto idx = 0; idx < device_num; ++idx) {

    err = clGetDeviceInfo(all_devices[idx], CL_DEVICE_NAME, 0, nullptr, &size);
    param_val.resize(size);

    // choose the first compute device by default
    char buffer[100];
    err = clGetDeviceInfo(
        all_devices[idx], CL_DEVICE_NAME, size, param_val.data(), nullptr);
    std::cout << "The OpenCL compute device is: "<< param_val << std::endl;
  }
  device = all_devices[0];
  
  // step-3: context
  ctx = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
  if(err != CL_SUCCESS)
  {	
    std::cout << "Unable to create the context!" << std::endl;
  }
  
  // step-4: cmd queue
  cmd_queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
  if(err != CL_SUCCESS){
    std::cout << "Unable to create the cmd queue!" << std::endl;
  }
  
  // step-5: create program
  const char *kernels_char = kernel_str.c_str();
  program = clCreateProgramWithSource(ctx,1,&kernels_char, nullptr, &err);
  if(CL_SUCCESS != err){
    std::cout << "Unable to create the program with source!" << std::endl;
  }else{
    std::cout << "Create the program successfully!"  << std::endl;
  }
  	
  // step-6: build program
  err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
  if(CL_SUCCESS != err){
    
    std::cout << "The program isn't built successfully!" << std::endl;

    size_t len = 0;
    cl_int ret = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0,
                                       nullptr, &len);
    std::cout << "The len of build info is: " << len << ", the return value is: " << ret << std::endl;;

    std::vector<char> buffer(len);
    ret = clGetProgramBuildInfo(
         program, device, CL_PROGRAM_BUILD_LOG, len, buffer.data(), nullptr);
    std::string build_log(buffer.begin(), buffer.end());

    if (ret != CL_SUCCESS) {
       std::cerr << "Failed to get build log" << std::endl;
    }

    std::cout << "Build log:\n" << build_log << std::endl;
  }else{
    std::cout << "Build the program successfully!"  << std::endl;
  }
  
  // step-7: create kernel
  const std::string kernel_name = "vector_add";
  add_kernel = clCreateKernel(program, kernel_name.c_str(), &err);
  if(CL_SUCCESS != err){
    std::cout << "Unable to create the kernel!" << std::endl;
  }else{
    std::cout << "create the kernel successfully!"  << std::endl;
  }
  
  // step-8: cl_mem
  size_t length = 10;
  std::vector<float> a(length, 1.f);
  std::vector<float> b(length, 2.f);
  std::vector<float> c(length, 0.f);
  
  // step-9: create buffer
  cl_mem a_mem = clCreateBuffer(ctx, 
  				CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
  				sizeof(float) * length,
  				nullptr,
  				nullptr
  				);
  cl_mem b_mem = clCreateBuffer(ctx, 
  				CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
  				sizeof(float) * length,
  				nullptr,
  				nullptr
  				);
  cl_mem c_mem = clCreateBuffer(ctx, 
  				CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
  				sizeof(float) * length,
  				nullptr,
  				nullptr
  				);
  
  // step-10: write buffer	
  err = clEnqueueWriteBuffer(cmd_queue, 
			     a_mem, 
			     CL_TRUE, 
			     0, 
			     sizeof(float) * length, 
			     a.data(), 
			     0, 
			     nullptr, 
			     nullptr
			     );
  err |= clEnqueueWriteBuffer(cmd_queue, 
			     b_mem, 
			     CL_TRUE, 
			     0, 
			     sizeof(float) * length, 
			     b.data(), 
			     0, 
			     nullptr, 
			     nullptr
			     );
  clFinish(cmd_queue);
  if(CL_SUCCESS != err){
    std::cout << "Unable to write data into cl_mem!" << std::endl;
  }else{
    std::cout << "Write data into cl_mem successfully!" << std::endl;
  }
  
  // step-11: set kernel args
  err  = clSetKernelArg(add_kernel, 0, sizeof(cl_mem), &a_mem);
  err |= clSetKernelArg(add_kernel, 1, sizeof(cl_mem), &b_mem);
  err |= clSetKernelArg(add_kernel, 2, sizeof(cl_mem), &c_mem);  
  if(CL_SUCCESS != err){
    std::cout << "Unable to set kernel args!" << std::endl;
  }else{
    std::cout << "Set the args successfully!" << std::endl;
  }
  
  // step-12: run the kernel
  size_t global_work_size[1] = {length};
  int work_dim = 1;
  
  err = clEnqueueNDRangeKernel( cmd_queue,
  				add_kernel,
  				work_dim,
  				nullptr,
  				global_work_size,
  				nullptr,
  				0,
  				nullptr,
  				nullptr
  			      );
  clFinish(cmd_queue);
  if(CL_SUCCESS != err){
    std::cout << "Unable to Enqueue NDRange kernel!" << std::endl;
  }else{
    std::cout << "Enqueue NDRange successfully!" << std::endl;
  }
  
  // step-13: read the data from cl_mem
  err = clEnqueueReadBuffer(	cmd_queue, 
  				c_mem, 
  				CL_TRUE, 
  				0, 
  				sizeof(float) * length, 
  				c.data(), 
  				0, 
  				nullptr, 
  				nullptr);
  clFinish(cmd_queue);
  	
  std::cout << "1 + 2 = ";
  for(const auto &ele : c){
  	std::cout  << ele << ", ";
  }
  std::cout << std::endl;

}	


        上述测试代码最终可按照如下指令进行编译及运行:
$ g++ -o cl_env_test env_test.cpp -lOpenCL    // 构建
$ ./cl_env_test                               // 测试运行
4. 最终效果

上述构建完成的镜像最终可按照如下指令运行:

$ docker run -it --rm --gpus all nvidia-opencl

而后输入指令"clinfo",以检索opencl设备,检索结果如下:

        demo程序的输出结果如下:

  • 10
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值