其中,opencl code如下:
//pipeline
__kernel void pipe_producer(__global float *src, write_only pipe float out_pipe)
{
int gid = get_global_id(0);
reserve_id_t res_id;
//every work-item read one package data from out_pipe
res_id = reserve_write_pipe(out_pipe, 1);
float src_pipe = src[gid];
if(is_valid_reserve_id(res_id))
{
if(write_pipe(out_pipe, res_id, 0, &src_pipe) != 0)
{
return ;
}
commit_write_pipe(out_pipe,res_id);
}
}
__kernel void pipe_consumer(__global float *dst, read_only pipe float in_pipe)
{
int gid = get_global_id(0);
reserve_id_t res_id;
res_id = reserve_read_pipe(in_pipe, 1);
//every work-item write one package data from in_pipe
float dst_pipe;
if(is_valid_reserve_id(res_id))
{
if(read_pipe(in_pipe, res_id, 0, &dst_pipe) != 0)
{
return ;
}
commit_read_pipe(in_pipe,res_id);
}
dst[gid] = dst_pipe;
}
其中,服务器端的代码如下:
//follow is pipeline example code
{
int numPackets = 256;
int packetSize = sizeof(float);
int sizeBytes = numPackets * packetSize;
cl_kernel kernel_pipe_consumer = clCreateKernel(program, "pipe_consumer", &err);
opencl_check(err);
cl_kernel kernel_pipe_producer = clCreateKernel(program, "pipe_producer", &err);
opencl_check(err);
//fine grain svm
float *src = (float*)clSVMAlloc(context, CL_MEM_READ_WRITE/*|CL_MEM_SVM_FINE_GRAIN_BUFFER*/, sizeBytes, 0);
float *dst = (float*)clSVMAlloc(context, CL_MEM_READ_WRITE/*|CL_MEM_SVM_FINE_GRAIN_BUFFER*/, sizeBytes, 0);
if( src == NULL || dst == NULL)
{
printf("allocate svm error , src %p, dst %p\n", src, dst);
}
//create pipeline, package data type is float, package data number is numPackets
cl_mem pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, packetSize, numPackets, NULL, &err);
opencl_check(err);
//initialize the src and dst memory
for(int i = 0; i < numPackets; i++)
{
src[i] = i;
dst[i] = 0.f;
}
size_t globalworksize = numPackets;
size_t localworksize = 128;
//create event
cl_event producer_event = clCreateUserEvent(context, &err);
opencl_check(err);
//set producer kernel arguments
err = clSetKernelArgSVMPointer(kernel_pipe_producer, 0, src);
opencl_check(err);
err = clSetKernelArg(kernel_pipe_producer, 1, sizeof(cl_mem), &pipe);
opencl_check(err);
//execute the producer kernel
err = clEnqueueNDRangeKernel(queue, kernel_pipe_producer, 1, NULL, &globalworksize, &localworksize, 0, NULL, &producer_event);
opencl_check(err);
//set consumer kernel arguments
err = clSetKernelArgSVMPointer(kernel_pipe_consumer, 0, dst);
opencl_check(err);
err = clSetKernelArg(kernel_pipe_consumer, 1, sizeof(cl_mem), &pipe);
opencl_check(err);
//execute the consumer kernel
err = clEnqueueNDRangeKernel(queue, kernel_pipe_consumer, 1, NULL, &globalworksize, &localworksize, 1, &producer_event, NULL);
opencl_check(err);
err = clFinish(queue);
opencl_check(err);
//print dst memory
for(int i = 0; i < numPackets; i++)
{
printf("%f \n", dst[i]);
}
printf("\n");
}