例程介绍
OpenCL的Hello Wolrd,简单演示如何dispatch一个kelnel到DSP,并读回数据。
例程源码
Host端
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <cassert>
#include <signal.h>
#include "ocl_util.h"
#ifdef _TI_RTOS
#include "kernel.dsp_h"
#include "../rtos_main.c"
#endif
using namespace cl;
using namespace std;
const int size = 1 << 23;
const int wgsize = 1 << 14;
#ifdef _TI_RTOS
void ocl_main(UArg arg0, UArg arg1)
{
// int argc = (int) arg0;
// char **argv = (char **) arg1;
cl_char *ary = (cl_char *) __malloc_ddr(size);
assert(ary != nullptr);
#else
#define RETURN(x) return x
cl_char ary [size];
int main(int argc, char *argv[])
{
#endif
/*-------------------------------------------------------------------------
* Catch ctrl-c so we ensure that we call dtors and the dsp is reset properly
*------------------------------------------------------------------------*/
signal(SIGABRT, exit);
signal(SIGTERM, exit);
memset(ary, 0, size);
try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR); //创建基于CL_DEVICE_TYPE_ACCELERATOR(对应EVE,但AM5728仅有DSP)的context
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); //获取对应device
Buffer buf (context, CL_MEM_WRITE_ONLY, size); //基于该context创建一个只写的buffer
#ifndef _TI_RTOS
ifstream t("kernel.cl"); //读取cl函数文件
std::string kSrc((istreambuf_iterator<char>(t)),
istreambuf_iterator<char>()); //读入cl函数的内容
//模板化的Program创建方式
Program::Sources source(1, make_pair(kSrc.c_str(), kSrc.length()));
Program program = Program(context, source);
#else //不同的Program创建方式
Program::Binaries binary(1, make_pair(kernel_dsp_bin,
sizeof(kernel_dsp_bin)));
Program program = Program(context, devices, binary);
#endif
program.build(devices); //在devices上执行Program
CommandQueue Q (context, devices[0]); //创建对应该context下devices[0]的CommandQueue,一个CommandQueue对应一个device
auto devset = cl::make_kernel<Buffer&>(program, "devset"); //创建kernel,第二个参数对应cl函数名,<Buffer&>表示传入一个Buffer地址
EnqueueArgs eargs(Q, NDRange(size), NDRange(wgsize)); //设置参数,第二个参数为work-dim维的work-item的总数,第三个参数为work-dim维下组成一个work-group的work-item数目
//第一个参数传入EnqueueArgs对象,第二个参数传入cl函数所需的变量
devset(eargs, buf).wait(); // call the kernel and wait for completion
Q.enqueueReadBuffer(buf, CL_TRUE, 0, size, ary); //读被OpenCL设备处理后的buf数据,CL_TRUE表示阻塞读,0表示偏移数
}
catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
exit(-1);
}
for (int i = 0; i < size; ++i) assert(ary[i] == 'x');
#ifdef _TI_RTOS
__free_ddr(ary);
#endif
std::cout << "Done!" << std::endl;
RETURN(0);
}
OpenCL设备端
kernel void devset(global char* buf)
{
//get_global_id(int),传入0,返回(work-dim)-1;传入其他,返回0
buf[get_global_id(0)] = 'x';
}