例程介绍
介绍如何在OpenCL设备端进行heap相关操作。
例程源码
Host端源码
#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.hpp>
#include <iostream>
#include <cstdlib>
#include "ocl_util.h"
#include "kernels.dsp_h"
using namespace cl;
using namespace std;
/*-----------------------------------------------------------------------------
* This example demonstrates how a heap may be created and used on the DSP
* for kernels that call legacy code that needs heap capability. There are dsp
* builtin functions to create and manipulate a user defined heap in both msmc
* and ddr. These heaps are persistent as long as the underlying memory for
* them is allocated. In this example we create OpenCL buffers that provide
* for the underlying memory store. The heaps are active and persistent from
* the time they are initialized until the buffers are deallocated.
*
* Additionally, the standard malloc, calloc, free, etc calls are already
* supported on the dsp, but the underlying memory for that heap is limited.
* It currently is approximately 8MB. If your heap needs are under that size,
* and DDR allocation is ok for you, then the below mechanism is not needed.
*----------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
devices.resize(1); // Only run on one device for demonstration
/*------------------------------------------------------------------------
* OpenCL Build the precompiled kernels
*-----------------------------------------------------------------------*/
Program::Binaries binary(1, make_pair(kernels_dsp_bin,sizeof(kernels_dsp_bin)));
Program program = Program(context, devices, binary);
program.build(devices);
/*------------------------------------------------------------------------
* Create a command queue and kernelfunctors for all kernels in our program
*-----------------------------------------------------------------------*/
CommandQueue Q(context, devices[0]);
KernelFunctor heap_init_ddr = Kernel(program, "heap_init_ddr") .bind(Q, NDRange(1), NDRange(1));
KernelFunctor heap_init_msmc = Kernel(program, "heap_init_msmc").bind(Q, NDRange(1), NDRange(1));
KernelFunctor alloc_and_free = Kernel(program, "alloc_and_free").bind(Q, NDRange(8), NDRange(1));
KernelFunctor alloc_only = Kernel(program, "alloc_only") .bind(Q, NDRange(8), NDRange(1));
/*------------------------------------------------------------------------
* Create the underlying memory store for the heaps with OpenCL Buffers
* Call kernels to initialize a DDR based and a MSMC based heap, the init
* step only needs to run once and one 1 core only. See the functor
* mapping above that defines the global size to be 1.
*-----------------------------------------------------------------------*/
int ddr_heap_size = 16 << 20; // 16MB
int ddr_alloc_size = 1024;
cout << "[host ] DDR heap size " << (ddr_heap_size/1024) << "k" << endl;
Buffer HeapDDR (context, CL_MEM_READ_WRITE, ddr_heap_size);
heap_init_ddr (HeapDDR, ddr_heap_size) .wait();
cout << "[host ] DDR heap init'ed" << endl;
int msmc_heap_size = 0;
int msmc_alloc_size = 0;
cl_ulong msmc_mem_size = 0;
devices[0].getInfo(CL_DEVICE_MSMC_MEM_SIZE_TI, &msmc_mem_size);
if(msmc_mem_size > 0)
{
msmc_heap_size = msmc_mem_size;
msmc_alloc_size = 1024;
cout << "[host ] MSMC heap size " << (msmc_heap_size/1024) << "k" << endl;
Buffer HeapMSMC(context, CL_MEM_READ_WRITE|CL_MEM_USE_MSMC_TI, msmc_heap_size);
heap_init_msmc(HeapMSMC, msmc_heap_size).wait();
cout << "[host ] MSMC heap init'ed" << endl;
}
else
{
cout << "[host ] MSMC unavailable" << endl;
}
cout << endl;
/*------------------------------------------------------------------------
* On each core alloc memory from both ddr and msmc and the free it.
*-----------------------------------------------------------------------*/
alloc_and_free(ddr_alloc_size, msmc_alloc_size).wait();
cout << endl;
/*------------------------------------------------------------------------
* On each core alloc memory from both ddr and msmc. Should see same memory
* from above alloc_and_free call. This time the memory is not freed.
*-----------------------------------------------------------------------*/
alloc_only(ddr_alloc_size, msmc_alloc_size).wait();
cout << endl;
/*------------------------------------------------------------------------
* Again, alloc on each core. Since the previous call did not free, these
* allocations should be in separate memory from the last set.
*-----------------------------------------------------------------------*/
alloc_only(ddr_alloc_size, msmc_alloc_size).wait();
cout << endl;
}
catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
}
OpenCL设备源码
/*-----------------------------------------------------------------------------
* These kernels initialize user controlled heaps, they do not have to be
* separate kernels. The call to __heap_init_xxx can be rolled into an existing
* kernel and called before any __malloc_xxx calls are made.
*
* These heaps can be persistent across kernel boundaries as long as the
* underlying memory (aka buffers pointed to by p are not deallocated.
*----------------------------------------------------------------------------*/
kernel void heap_init_ddr(__global void *p, size_t bytes)
{
printf("DDR heap pointer is 0x%08x\n", p);
__heap_init_ddr(p,bytes); //在DDR区域创建heap
}
kernel void heap_init_msmc(__global void *p, size_t bytes)
{
printf("MSMC heap pointer is 0x%08x\n", p);
__heap_init_msmc(p,bytes); //在MSMC区域创建heap
}
/*-----------------------------------------------------------------------------
* This kernel will allocate from the heaps and then free them memory.
*----------------------------------------------------------------------------*/
kernel void alloc_and_free(int ddr_bytes, int msmc_bytes)
{
if(ddr_bytes > 0)
{
char *p1 = __malloc_ddr(ddr_bytes); //在DDR区域的heap上分配空间
if(p1)
printf("DDR alloc+free pointer is 0x%08x\n", p1);
else
printf("DDR alloc+free ERROR FAILED ALLOCATION\n");
__free_ddr(p1);
}
if(msmc_bytes > 0)
{
char *p2 = __malloc_msmc(msmc_bytes); //在MSMC区域heap分配空间
if(p2)
printf("MSMC alloc+free pointer is 0x%08x\n", p2);
else
printf("MSMC alloc+free ERROR FAILED ALLOCATION\n");
__free_msmc(p2);
}
}
/*-----------------------------------------------------------------------------
* This kernel will allocate from the heaps and the memory is not freed. The
* active pointers p1 and p2 could be returned to the host application, via
* output arguments to the kernel. They could then subsequently be passed to
* other kernels. However the values should not be dereferenced on the host,
* because the DSP addresses are not valid linux system addresses.
*
* Additionally, if you do maintain a malloced block across kernel boundaries,
* depending on how the kernels are enqueued you may not know which core will
* subsequently be passed the pointer and access the memory, therefore you
* will need to manage cache coherency manually. This method of communicating
* across kernels is not recommended for this reason. Passing a buffer from
* the host to kernel 1 that populates the buffer and then passing the buffer
* to kernel 2 where the buffer is read is the preferred method for
* communicated across kernels, because it is portable and cache operations
* are managed automatically.
*----------------------------------------------------------------------------*/
kernel void alloc_only(int ddr_bytes, int msmc_bytes)
{
if(ddr_bytes > 0)
{
char *p1 = __malloc_ddr(ddr_bytes);
if(p1)
printf("DDR alloc pointer is 0x%08x\n", p1);
else
printf("DDR alloc ERROR FAILED ALLOCATION\n");
}
if(msmc_bytes > 0)
{
char *p2 = __malloc_msmc(msmc_bytes);
if(p2)
printf("MSMC alloc pointer is 0x%08x\n", p2);
else
printf("MSMC alloc ERROR FAILED ALLOCATION\n");
}
}