对opencl helloworld代码的修正

最新推荐文章于 2021-09-05 10:54:31 发布

directx3d_beginner

最新推荐文章于 2021-09-05 10:54:31 发布

阅读量214

点赞数

分类专栏： opencl

本文链接：https://blog.csdn.net/directx3d_beginner/article/details/80683456

版权

opencl 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

由于代码比较乱，数组也容易越界，故重新加了个类，用了stl::vector,代码如下

test.cl

__kernel void hello_kernel(   __global const float * a,
                           __global const float * b,
                           __global float* result )
{
   int gid = get_global_id(0);
   result[gid] = a[gid] + b[gid];

}

头文件

#pragma once
#include <iostream>
#include <fstream>
#include <sstream>
#include <time.h>
#include <vector>

#ifdef __APPLE__
#include <opencl/cl.h>
#else
#include <CL/cl.h>
#endif

//读写内存
enum readWrite
{
   readOnly = 0, //只读
   rw = 1           //读写
};

class myOpenCL
{
public:
   myOpenCL(std::string strOpenCLFileName,
       std::string strOpenCLKernalEntry,
       int objectSize,
       int numberOfEachObject,
       int sizeOfEachUnit,
       std::vector<std::vector<float>> inputVec2);
   ~myOpenCL();

public:
   //处理全过程
   void process();
   //返回结果
   std::vector<float> getResult();
   //为cpu平台创建上下文
   cl_context createContext();
   //选择第一个可用设备，并创建一个命令队列
   cl_command_queue createCommandQueue(cl_context context, cl_device_id & device);
   //从磁盘加载内核源文件创建和构建一个程序对象
   cl_program createProgram( const char* fileName);
   //建立内核参数
   cl_int setKernelParameter(int id, cl_mem theData);
   //使用命令队列使将在设备上执行的内核排队
   cl_int setKernalQueue(size_t* globalWorkSize, size_t* localWorkSize);
   //从内核读回结果
   cl_int readResult(cl_mem memObject, float * result);

public:
   //返回设备上下文
   cl_context getContext();
private:
   std::string _strOpenCLFileName; //opencl处理的文件名称
   std::string _strOpenCLKernalEntry;//opencl入口名称
   cl_context _theContext;       //设备上下文
   cl_command_queue _commandQueue;//命令队列
   cl_device_id _device;   //设备ID
   cl_program _theProgram; //程序对象
   cl_kernel _theKernel;//创建opencl内核
   int _objectSize;       //物体的数目
   int _numberOfEachObject;   //每个物体含多少值
   int _sizeOfEachUnit;   //每个值的大小
   std::vector<std::vector<float>> _inputVec2;//输入的vector

};

实现文件

#include "myOpenCL.h"
myOpenCL::myOpenCL(std::string strOpenCLFileName,
   std::string strOpenCLKernalEntry,
   int objectSize,
   int numberOfEachObject,
   int sizeOfEachUnit,
   std::vector<std::vector<float>> inputVec2)
{
   _strOpenCLFileName = strOpenCLFileName;
   _strOpenCLKernalEntry = strOpenCLKernalEntry;
   _objectSize = objectSize;
   _numberOfEachObject = numberOfEachObject;
   _sizeOfEachUnit = sizeOfEachUnit;
   _inputVec2 = inputVec2;
   _theContext = NULL;
   _commandQueue = NULL;
   _theProgram = NULL;
   _theKernel = NULL;

}

myOpenCL::~myOpenCL()
{
   _inputVec2.clear();
   if (_commandQueue != 0)
   {
       clReleaseCommandQueue(_commandQueue);
   }

   if (_theKernel != 0)
   {
       clReleaseKernel(_theKernel);
   }
   if (_theProgram != 0)
   {
       clReleaseProgram(_theProgram);
   }
   if (_theContext != 0)
   {
       clReleaseContext(_theContext);
   }
}

//为cpu平台创建上下文
cl_context myOpenCL::createContext()
{
   cl_platform_id firstPlatformId = 0;
   cl_uint numPlatforms = 0;
   //这里选择第一个平台
   cl_int errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
   //创建平台的一个上下文，先试图创建一个gpu的，如果没有的话，就创建cpu的
   cl_context_properties contextProperties[] =
   {
       CL_CONTEXT_PLATFORM,
       (cl_context_properties)firstPlatformId,
       0
   };
   cl_context context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
   if (errNum != CL_SUCCESS)
   {
       context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum);
   }
   return context;
}
//选择第一个可用设备，并创建一个命令队列
cl_command_queue myOpenCL::createCommandQueue(cl_context context, cl_device_id & device)
{
   size_t deviceBufferSize = -1;
   clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
   //为设备缓存分配空间
   cl_device_id * devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
   clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
   //这里只选择第一个可用的设备，在该设备创建一个命令队列.这个命令队列用于将程序中要执行的内核排队，并读回结果
   cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);

   device = devices[0];
   delete[] devices;
   return commandQueue;
}

//从磁盘加载内核源文件创建和构建一个程序对象
cl_program myOpenCL::createProgram( const char* fileName)
{
   std::ifstream kernelFile(fileName, std::ios::in);
   if (!kernelFile.is_open())
   {
       std::cerr << "不能打开文件" << fileName << std::endl;
       return NULL;
   }

   std::ostringstream oss;
   oss << kernelFile.rdbuf();
   std::string srcStdStr = oss.str();
   const char * srcStr = srcStdStr.c_str();
   //创建程序对象
   cl_program program = clCreateProgramWithSource(_theContext, 1, (const char**)&srcStr, NULL, NULL);
   //编译内核源码
   clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
   return program;
}

//返回设备上下文
cl_context myOpenCL::getContext()
{
   return _theContext;
}

//建立内核参数
cl_int myOpenCL::setKernelParameter( int id, cl_mem theData)
{
   cl_int errNum = clSetKernelArg(_theKernel, id, sizeof(cl_mem), &theData);
   return errNum;
}

//使用命令队列使将在设备上执行的内核排队
cl_int myOpenCL::setKernalQueue(size_t* globalWorkSize, size_t* localWorkSize)
{
   cl_int errNum = clEnqueueNDRangeKernel(_commandQueue, _theKernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
   return errNum;
}
//从内核读回结果
cl_int myOpenCL::readResult(cl_mem memObject, float * result)
{
   cl_int errNum = clEnqueueReadBuffer(_commandQueue, memObject, CL_TRUE, 0, _numberOfEachObject * _sizeOfEachUnit, result, 0, NULL, NULL);
   return errNum;
}

//处理全过程
void myOpenCL::process()
{
   _theContext = this->createContext();
   _commandQueue = this->createCommandQueue(_theContext, _device);
   _theProgram = this->createProgram(_strOpenCLFileName.c_str());
   //创建opencl内核
   _theKernel = clCreateKernel(_theProgram, _strOpenCLKernalEntry.c_str(), NULL);

   //读写vector,用以在分配内存时判断只读还是读写
   //设定前几个只读，最后一个读写
   std::vector<int> readWriteVector;
   readWriteVector.clear();
   readWriteVector.resize(_objectSize);
   for (size_t i = 0; i < _objectSize - 1; i++)
   {
       readWriteVector[i] = readWrite::readOnly;
   }
   readWriteVector[_objectSize - 1] = readWrite::rw;

   std::vector<cl_mem> memObjectVector;
   memObjectVector.clear();
   memObjectVector.resize(_objectSize);
   for (size_t i = 0; i < _objectSize; i++)
   {
       memObjectVector[i] = 0;
   }

   cl_context theContext = this->getContext();
   //先读后写分配内存
   for (size_t i = 0; i < _objectSize; i++)
   {
       int readW = readWriteVector[i];
       if (readW == readWrite::readOnly)
       {
           memObjectVector[i] = clCreateBuffer(theContext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
               sizeof(float) * _numberOfEachObject, &_inputVec2[i][0], NULL);
       }
       else if (readW == readWrite::rw)
       {
           memObjectVector[i] = clCreateBuffer(theContext, CL_MEM_READ_WRITE, _sizeOfEachUnit * _numberOfEachObject, NULL, NULL);
       }
   }

   //建立内核参数
   for (size_t i = 0; i < _objectSize; i++)
   {
       this->setKernelParameter(i, memObjectVector[i]);
   }
   //使用命令队列使将在设备上执行的内核排队
   size_t globalWorkSize[1] = { _numberOfEachObject };
   size_t localWorkSize[1] = { 1 };
   this->setKernalQueue(globalWorkSize, localWorkSize);
   //从内核读回结果
   this->readResult(memObjectVector[_objectSize - 1], &_inputVec2[_objectSize - 1][0]);
   memObjectVector.clear();
}

//返回结果
std::vector<float> myOpenCL::getResult()
{
   return _inputVec2[_objectSize - 1];
}

调用文件

#include "myOpenCL.h"

const int ARRAY_SIZE = 1000000;

int main(int argc, char ** argv)
{
   std::string strFileName = "test.cl";
   std::string strOpenCLKernalEntry = "hello_kernel";
   int objectSize = 3;
   int numberOfEachObject = ARRAY_SIZE;

   //设定各单元数值
   std::vector<std::vector<float>> computeVector;
   computeVector.clear();
   computeVector.resize(objectSize);
   for (size_t j = 0; j < objectSize; j++)
   {
       computeVector[j].resize(numberOfEachObject);
   }

   for (size_t i = 0; i < numberOfEachObject; i++)
   {
       computeVector[0][i] = (float)i;
       computeVector[1][i] = (float)(i * 2);
   }

   myOpenCL theOpenCL(strFileName, strOpenCLKernalEntry,objectSize,numberOfEachObject,sizeof(float),computeVector);
   theOpenCL.process();
   //输出结果
   std::vector<float> resultVec = theOpenCL.getResult();
   int sizeOfResult = resultVec.size();
   for (size_t i = 0; i < sizeOfResult; i++)
   {
       if (i % 10 == 0)
       {
           std::cout << std::endl;
       }
       float theResult = resultVec[i];
       std::cout << theResult << ",";
   }

   return 0;
}