例子: 对res数组求平均值ans为结果
#include"CL/cl.h"
#include<iostream>
#define SIZE_X 10
#define SIZE_Y 10
const char* KernelAverage =
"const int dir[9][2] = { 1 , 0 , 0 , 1 , -1 , 0 , 0 , -1 , 1 , 1 , 1 , -1 , -1 , 1 , -1 , -1 , 0 , 0 };\n"
"__kernel void GetAverage( __global int *res , __global int *ans ) \n"
"{\n"
"size_t x = get_global_id( 0 );\n"
"size_t y = get_global_id( 1 );\n"
"size_t x_size = get_global_size( 0 );\n"
"size_t y_size = get_global_size( 1 );\n"
"int tx , ty;\n"
"int cnt = 0 , sum = 0;\n"
"for( int i = 0 ; i < 9 ; ++i )\n"
"{\n"
" tx = x + dir[i][0];\n"
" ty = y + dir[i][1];\n"
" if( tx < 0 || ty < 0 || tx >= x_size || ty >= y_size )\n"
" { continue; }\n"
" sum += res[tx + ty * x_size];\n"
" cnt++;\n"
"}\n"
"if( cnt != 0 )\n"
"{ sum /= cnt; ans[x + y * x_size] = sum; }\n"
"else\n"
"{ ans[x + y * x_size] = 0; }\n"
"}\n";
int main() {
cl_int status = 0;
cl_uint numPlatforms;
/// Get the number of the Platforms //
status = clGetPlatformIDs( 0 , NULL , &numPlatforms );
if( status != CL_SUCCESS ) {
printf( "Error:Getting Platforms.(clGetPlatformsIDs)\n" );
return EXIT_FAILURE;
}
//
if( numPlatforms <= 0 )
{ printf("The number of the platform is ZERO\n"); return 0; }
cl_platform_id *platforms = new cl_platform_id[numPlatforms];
/// Get the lists of the platforms
status = clGetPlatformIDs( numPlatforms , platforms , NULL );
if ( status != CL_SUCCESS)
{
printf( "Error:Getting Platform Ids.(clGetPlatformsIDs)\n" );
return -1;
}
Get the AMD platform //
cl_platform_id platform;
for( unsigned int i = 0 ; i < numPlatforms ; ++i ) {
char pff[100];
status = clGetPlatformInfo( platforms[i] , CL_PLATFORM_VENDOR , sizeof( pff ) , pff , NULL );
platform = platforms[i];
if( !strcmp( pff , "Avanced Micro Devices , Inc." ) )
{ break; }
}
delete []platforms;
/// Get the platform context
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM , ( cl_context_properties )platform , 0 };
cl_context_properties *cprops = ( NULL == platform ) ? NULL : cps;
cl_context context = clCreateContextFromType( cprops , CL_DEVICE_TYPE_CPU , NULL , NULL , &status );
if( status != CL_SUCCESS )
{
printf( "Error: Creating Context.( clCreateContextFromType )\n");
return EXIT_FAILURE;
}
size_t deviceListSize;
status = clGetContextInfo( context , CL_CONTEXT_DEVICES , 0 , NULL , &deviceListSize );
if( status != CL_SUCCESS )
{
printf( "Error: Getting Context Info device list size. clGetContextInfo\n");
return EXIT_FAILURE;
}
cl_device_id *devices = ( cl_device_id * )malloc( deviceListSize );
if( devices == NULL )
{
printf( "Error: No devices found.\n");
return EXIT_FAILURE;
}
status = clGetContextInfo( context , CL_CONTEXT_DEVICES , deviceListSize , devices , NULL );
if( status != CL_SUCCESS )
{
printf( "Error: Getting Context Info device list , clGetContextInfo\n");
return EXIT_FAILURE;
}
Get the source code /
size_t sourceSize[] = { strlen( KernelAverage ) };
//clCreateProgramWithBinary( context , 1 ,devices , sourceSize , , ,);
cl_program program = clCreateProgramWithSource( context , 1 , &KernelAverage , sourceSize , &status );
if( status != CL_SUCCESS ) {
printf( "Error: Loading Binary into cl_program( clCreateProgramWithBinary )\n");
return EXIT_FAILURE;
}
/ Build Code /
status = clBuildProgram( program , 1 , devices , NULL , NULL , NULL );
if( status != CL_SUCCESS ) {
printf( "Error: Building Program( clBuildProgram )\n %d" , status );
if( status == CL_BUILD_PROGRAM_FAILURE ) {
cl_int logStatus;
char * buildLog = NULL;
size_t buildLogSize = 0;
logStatus = clGetProgramBuildInfo( program , devices[0] , CL_PROGRAM_BUILD_LOG , buildLogSize , buildLog , &buildLogSize );
buildLog = ( char* )malloc( buildLogSize );
memset( buildLog , 0 , buildLogSize );
logStatus = clGetProgramBuildInfo( program , devices[0] , CL_PROGRAM_BUILD_LOG , buildLogSize , buildLog , NULL );
std::cout << " \n\t\t\tBUILD LOG\n";
std::cout << buildLog << std::endl;
free( buildLog );
}
return EXIT_FAILURE;
}
Create Kernel ///
cl_kernel kernel = clCreateKernel( program , "GetAverage" , &status );
if( status != CL_SUCCESS ){
printf( "Error: Creating Kernel from program.(clCreateKernel)\n");
return EXIT_FAILURE;
}
Create Command Queue /
cl_command_queue commandQueue = clCreateCommandQueue( context , devices[0] , 0 , &status );
if( status != CL_SUCCESS ) {
printf( "Creating Command Queue.(clCreateCommandQueue)\n");
return EXIT_FAILURE;
}
int ans[SIZE_Y][SIZE_X] = { 0 };
int res[SIZE_Y][SIZE_X] = { 1 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
1 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
9 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
7 , 3 , 5 , 9 , 9 , 9 , 1 , 0 , 5 , 2 ,
6 , 4 , 2 , 9 , 9 , 9 , 7 , 5 , 3 , 3 ,
1 , 3 , 5 , 9 , 9 , 9 , 1 , 0 , 5 , 2 ,
6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 };
//memset( ans , 0 , sizeof( int ) * SIZE_X * SIZE_Y );
/ Create Argument /
cl_mem resBuffer = clCreateBuffer( context , CL_MEM_USE_HOST_PTR , SIZE_X * SIZE_Y * sizeof( int ) ,
res , &status );
cl_mem ansBuffer = clCreateBuffer( context , CL_MEM_USE_HOST_PTR , SIZE_X * SIZE_Y * sizeof( int ) ,
ans , &status );
if( status != CL_SUCCESS ) {
printf( "Error: clCreateBuffer.\n");
return EXIT_FAILURE;
}
status = clSetKernelArg( kernel , 0 , sizeof( cl_mem ) ,( void* )&resBuffer );
status = clSetKernelArg( kernel , 1 , sizeof( cl_mem ) ,( void* )&ansBuffer );
if( status != CL_SUCCESS ) {
printf( "Error: Setting kernel argument.\n");
return EXIT_FAILURE;
}
Implement kernel
size_t globalThread[] = { 10 , 10 };
size_t localThread[] = { 2 , 2 };
status = clEnqueueNDRangeKernel( commandQueue , kernel , 2 , NULL , globalThread , localThread , 0 , NULL , NULL );
if( status != CL_SUCCESS ) {
printf( "Error: Enqueueing kernel\n");
return EXIT_FAILURE;
}
/ Wait for kernel to finish
status = clFinish( commandQueue );
if( status != CL_SUCCESS ) {
printf( "Error: Finish command queue\n");
return EXIT_FAILURE;
}
//
printf("out:\n");
for( int i = 0 ; i < 10 ; ++i ) {
for( int j = 0 ; j < 10 ; ++j ) {
printf( "%3d " , ans[i][j] );
}
printf("\n");
}
return 0;
}
注意:在OpenCL中在可能的情况下,尽量少用global类型的数据,要多用local和private,频繁的使用global会很影响性能。