1. 什么是DeviceFission?
DeviceFission就是设备拆分,目前仅支持CPU的拆分。在《OpenCL异构计算》中的第11章有部分内容。其中举得例子是AMD的6核Instanbul x86 CPU,该CPU有6个核,在默认时是作为一个设备看待的,但是通过设备拆分可以将6个核各自看做一个OpenCL设备。每一个设备都可以有一个或多个命令队列,这些命令队列时异步的并且各自在自己的线程种运行。这样可以用设备拆分来创建一个简单强大的基于任务并行的多线程应用程序。
2.例子的功能介绍
该例子的是将CPU设备分成两个,实现的函数为clCreateSubDevices,使用的特性为CL_DEVICE_PARTITION_BY_COUNTS特性。在该例子中只创建了一个缓冲区。由其中的一个子设备来写入初值,之后两个设备同时进行计算,一个子设备调用add内核,一个子设备调用sub内核。
3.内核实现函数
首先看下内核实现:
__kernel
void
Add(__global int* input, __global int* output)
{
size_t xPos = get_global_id(0);
output[xPos] = input[xPos] + 1;
}
__kernel
void
Sub(__global int* input, __global int* output)
{
size_t xPos = get_global_id(0);
output[xPos] = input[xPos] - 1;
}
这两个内核都很简单,这里不再解释。
4.主机关键部分解析
int
DeviceFission::setupDeviceFission()
{
// Make sure length is multiple of group size * numSubDevices
unsigned int mulFactor = (unsigned int)groupSize * numSubDevices;
length = (length < mulFactor) ? mulFactor : length;
length = (length / mulFactor) * mulFactor;
// Calculate half length
half_length = length >> 1;
// Get allocate memory for input buffer
input = (cl_int*)malloc(half_length * sizeof(cl_int));
CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)");
// Random initialisation of input
fillRandom<cl_int>(input, half_length, 1, 1, 8);
// Unless sampleArgs->quiet mode has been enabled, print the INPUT array
if(!sampleArgs->quiet)
{
printArray<cl_int>("Input:", input, half_length, 1);
}
// Get allocate memory for subOutput buffer
subOutput = (cl_int*)malloc(length * sizeof(cl_int));
CHECK_ALLOCATION(subOutput, "Failed to allocate host memory. (subOutput)");
return SDK_SUCCESS;
}
输入量初始化部分。
int
DeviceFission::setupCLPlatform()
{
cl_int status = CL_SUCCESS;
/*
* Have a look at the available platforms and pick either
* the AMD one if available or a reasonable default.
*/
cl_platform_id platform = NULL;
int retValue = getPlatform(platform, sampleArgs->platformId,
sampleArgs->isPlatformEnabled());
CHECK_ERROR(retValue, SDK_SUCCESS, "getPlatform(rootplatform) failed");
// Display available devices.
retValue = displayDevices(platform, CL_DEVICE_TYPE_ALL);
CHECK_ERROR(retValue, SDK_SUCCESS, "displayDevices(rootplatform) failed");
/*
* If we could find our platform, use it. Otherwise use just available platform.
*/
cl_context_properties cps[3] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)platform,
0
};
rContext = clCreateContextFromType(platform ? cps : NULL,
CL_DEVICE_TYPE_ALL,
NULL,
NULL,