Docker工具边学边用[3]: 在ubuntu22.04系统中搭建opencl运行环境

布里吉

已于 2024-06-28 11:45:10 修改

阅读量469

点赞数 10

文章标签： docker 容器运维 gpu算力

于 2024-06-16 16:41:30 首次发布

本文链接：https://blog.csdn.net/weixin_39782251/article/details/139721874

版权

1. 背景及需求说明

由于项目调试的需要，笔者需要临时在docker镜像中，对部分高耗时算子进行OpenCL加速。悲催的是，从docker hub官网拉下来的ubuntu22.04镜像中尚未包含相关模块，上述愿望直接落空。为了解决上述问题，笔者特地翻阅了多篇博文，并将主要流程及相关测试代码整理如下。

需补充说明的是，该文主要针对的是NVIDIA GPU及Intel核显设备进行的测试及代码验证，其余计算设备（如AMD系列显卡）在该文中并未涉及。

2. 步骤说明

* step-1: ubuntu2204宿主机上，安装nvidia-container-runtime库

* step-2: 创建docker file文件，并构建镜像

* step-3: 运行镜像

* step-4: demo代码验证

3. 具体实现细节

* step-1: nvidia-container-runtime

sudo apt install curl
curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | \
  sudo apt-key add -
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
  sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
sudo apt update
sudo apt install nvidia-container-runtime

* step-2: dockerfile

FROM ubuntu:22.04
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get -y upgrade \
  && apt-get install -y \
    g++ \
    apt-utils \
    unzip \
    tar \
    curl \
    xz-utils \
    ocl-icd-libopencl1 \
    ocl-icd-opencl-dev \
    opencl-headers \
    clinfo \
    ;

RUN mkdir -p /etc/OpenCL/vendors && \
    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility

* step-3: 镜像的构建指令如下：

$ docker build -t nvidia-opencl .

构建完成后的镜像信息，可通过"docker images"指令查看，结果如下：

* step-4: demo验证

#include <CL/cl.h>
#include <iostream>
#include <vector>

const std::string kernel_str = "__kernel void vector_add(global const float *a, global const float *b, global float *result) {		"
  			       " int gid = get_global_id(0);									 	"
  			       " result[gid] = a[gid] + b[gid];										"
			       "};													";

int main(){

  int err = 0;
  size_t size = 0;
  uint32_t platform_num = 0;
  cl_platform_id platform{};
  cl_context ctx{};
  cl_command_queue cmd_queue{};
  cl_device_id device{};
  std::string param_val;
  cl_program program;
  cl_kernel add_kernel;
  
  // step-1: platforms
  err = clGetPlatformIDs(0, nullptr, &platform_num);
  std::cout << "The num of platform is: " << platform_num << std::endl;
  
  std::vector<cl_platform_id> all_platforms;
  all_platforms.reserve(platform_num);
  err = clGetPlatformIDs(platform_num, all_platforms.data(), nullptr);
  
  for (int idx = 0; idx < platform_num; ++idx) {

    // get the size of current platform name
    err = clGetPlatformInfo(
        all_platforms[idx], CL_PLATFORM_NAME, 0, nullptr, &size);
    param_val.resize(size);

    // get the string of current platform name
    err = clGetPlatformInfo(all_platforms[idx],
                                       CL_PLATFORM_NAME,
                                       size,
                                       const_cast<char *>(param_val.data()),
                                       nullptr);

    std::cout << "The platform name is: " <<  param_val << std::endl;

    // NOTE: for 'CL_PLATFORM_NAME', you may get the following three items:
    // (1) NVIDIA CUDA / (2) Intel(R) OpenCL Graphics / (3) ARM Platform.
    // We chose intel graphics or arm firstly.
    if (param_val.find("Intel(R)") != std::string::npos ||
        param_val.find("ARM Platform") != std::string::npos) {

      // NOTE: for Intel and arm device, they are unable to exist meanwhile.
      platform = all_platforms[idx];
      break;
    }

    if (param_val.find("NVIDIA CUDA") != std::string::npos) {
      platform = all_platforms[idx];
    }

    // if the expected platform cannot still be found, print the error!
    if (platform == nullptr) {
      std::cout <<
          "The expected OpenCL platform (Intel, Arm or Nvidia) cannot be found "
          "in your device. The cl stream cannot be created now, please "
          "reconfirm it again!" << std::endl;
    }
  }
  
  // step-2: devices
  uint32_t device_num = 0;

  std::vector<cl_device_id> all_devices;

  err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, nullptr, &device_num);
  if (0 == device_num) {

    std::cout << "The expected OpenCL devices cannot be found in your device, "
                 "please reconfirm it again!" << std::endl;
  }
  std::cout << "The device num is: " << device_num << std::endl;
  
  // get the all the devices id
  all_devices.reserve(device_num);
  err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, device_num, all_devices.data(), nullptr);

  for (auto idx = 0; idx < device_num; ++idx) {

    err = clGetDeviceInfo(all_devices[idx], CL_DEVICE_NAME, 0, nullptr, &size);
    param_val.resize(size);

    // choose the first compute device by default
    char buffer[100];
    err = clGetDeviceInfo(
        all_devices[idx], CL_DEVICE_NAME, size, param_val.data(), nullptr);
    std::cout << "The OpenCL compute device is: "<< param_val << std::endl;
  }
  device = all_devices[0];
  
  // step-3: context
  ctx = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
  if(err != CL_SUCCESS)
  {	
    std::cout << "Unable to create the context!" << std::endl;
  }
  
  // step-4: cmd queue
  cmd_queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
  if(err != CL_SUCCESS){
    std::cout << "Unable to create the cmd queue!" << std::endl;
  }
  
  // step-5: create program
  const char *kernels_char = kernel_str.c_str();
  program = clCreateProgramWithSource(ctx,1,&kernels_char, nullptr, &err);
  if(CL_SUCCESS != err){
    std::cout << "Unable to create the program with source!" << std::endl;
  }else{
    std::cout << "Create the program successfully!"  << std::endl;
  }
  	
  // step-6: build program
  err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
  if(CL_SUCCESS != err){
    
    std::cout << "The program isn't built successfully!" << std::endl;

    size_t len = 0;
    cl_int ret = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0,
                                       nullptr, &len);
    std::cout << "The len of build info is: " << len << ", the return value is: " << ret << std::endl;;

    std::vector<char> buffer(len);
    ret = clGetProgramBuildInfo(
         program, device, CL_PROGRAM_BUILD_LOG, len, buffer.data(), nullptr);
    std::string build_log(buffer.begin(), buffer.end());

    if (ret != CL_SUCCESS) {
       std::cerr << "Failed to get build log" << std::endl;
    }

    std::cout << "Build log:\n" << build_log << std::endl;
  }else{
    std::cout << "Build the program successfully!"  << std::endl;
  }
  
  // step-7: create kernel
  const std::string kernel_name = "vector_add";
  add_kernel = clCreateKernel(program, kernel_name.c_str(), &err);
  if(CL_SUCCESS != err){
    std::cout << "Unable to create the kernel!" << std::endl;
  }else{
    std::cout << "create the kernel successfully!"  << std::endl;
  }
  
  // step-8: cl_mem
  size_t length = 10;
  std::vector<float> a(length, 1.f);
  std::vector<float> b(length, 2.f);
  std::vector<float> c(length, 0.f);
  
  // step-9: create buffer
  cl_mem a_mem = clCreateBuffer(ctx, 
  				CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
  				sizeof(float) * length,
  				nullptr,
  				nullptr
  				);
  cl_mem b_mem = clCreateBuffer(ctx, 
  				CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
  				sizeof(float) * length,
  				nullptr,
  				nullptr
  				);
  cl_mem c_mem = clCreateBuffer(ctx, 
  				CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
  				sizeof(float) * length,
  				nullptr,
  				nullptr
  				);
  
  // step-10: write buffer	
  err = clEnqueueWriteBuffer(cmd_queue, 
			     a_mem, 
			     CL_TRUE, 
			     0, 
			     sizeof(float) * length, 
			     a.data(), 
			     0, 
			     nullptr, 
			     nullptr
			     );
  err |= clEnqueueWriteBuffer(cmd_queue, 
			     b_mem, 
			     CL_TRUE, 
			     0, 
			     sizeof(float) * length, 
			     b.data(), 
			     0, 
			     nullptr, 
			     nullptr
			     );
  clFinish(cmd_queue);
  if(CL_SUCCESS != err){
    std::cout << "Unable to write data into cl_mem!" << std::endl;
  }else{
    std::cout << "Write data into cl_mem successfully!" << std::endl;
  }
  
  // step-11: set kernel args
  err  = clSetKernelArg(add_kernel, 0, sizeof(cl_mem), &a_mem);
  err |= clSetKernelArg(add_kernel, 1, sizeof(cl_mem), &b_mem);
  err |= clSetKernelArg(add_kernel, 2, sizeof(cl_mem), &c_mem);  
  if(CL_SUCCESS != err){
    std::cout << "Unable to set kernel args!" << std::endl;
  }else{
    std::cout << "Set the args successfully!" << std::endl;
  }
  
  // step-12: run the kernel
  size_t global_work_size[1] = {length};
  int work_dim = 1;
  
  err = clEnqueueNDRangeKernel( cmd_queue,
  				add_kernel,
  				work_dim,
  				nullptr,
  				global_work_size,
  				nullptr,
  				0,
  				nullptr,
  				nullptr
  			      );
  clFinish(cmd_queue);
  if(CL_SUCCESS != err){
    std::cout << "Unable to Enqueue NDRange kernel!" << std::endl;
  }else{
    std::cout << "Enqueue NDRange successfully!" << std::endl;
  }
  
  // step-13: read the data from cl_mem
  err = clEnqueueReadBuffer(	cmd_queue, 
  				c_mem, 
  				CL_TRUE, 
  				0, 
  				sizeof(float) * length, 
  				c.data(), 
  				0, 
  				nullptr, 
  				nullptr);
  clFinish(cmd_queue);
  	
  std::cout << "1 + 2 = ";
  for(const auto &ele : c){
  	std::cout  << ele << ", ";
  }
  std::cout << std::endl;

}

上述测试代码最终可按照如下指令进行编译及运行：

$ g++ -o cl_env_test env_test.cpp -lOpenCL    // 构建
$ ./cl_env_test                               // 测试运行

4. 最终效果

上述构建完成的镜像最终可按照如下指令运行：

$ docker run -it --rm --gpus all nvidia-opencl

而后输入指令"clinfo"，以检索opencl设备，检索结果如下：

demo程序的输出结果如下：

布里吉

关注

10
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
Docker工具边学边用[3]: 在ubuntu22.04系统中搭建opencl运行环境

本文给出了，在docker+ubuntu2204环境中，配置opencl环境的完整教程，方便调试opencl算子
复制链接

扫一扫