OpenCL的使用

例子: 对res数组求平均值ans为结果

#include"CL/cl.h"
#include<iostream>
#define SIZE_X 10
#define SIZE_Y 10
const char* KernelAverage = 
	"const int dir[9][2] = { 1 , 0 , 0 , 1 , -1 , 0 , 0 , -1 , 1 , 1 , 1 , -1 , -1 , 1 , -1 , -1 , 0 , 0 };\n"
	"__kernel void GetAverage( __global int *res , __global int *ans ) \n"
	"{\n"
	"size_t x = get_global_id( 0 );\n"
	"size_t y = get_global_id( 1 );\n"
	"size_t x_size = get_global_size( 0 );\n"
	"size_t y_size = get_global_size( 1 );\n"
	"int tx , ty;\n"
	"int cnt = 0 , sum = 0;\n"
	"for( int i = 0 ; i < 9 ; ++i )\n"
	"{\n"
	"  tx = x + dir[i][0];\n"
	"  ty = y + dir[i][1];\n"
	"  if( tx < 0 || ty < 0 || tx >= x_size || ty >= y_size )\n"
	"  { continue; }\n"
	"  sum += res[tx + ty * x_size];\n"
	"  cnt++;\n"
	"}\n"
	"if( cnt != 0 )\n"
	"{ sum /= cnt; ans[x + y * x_size] = sum; }\n"
	"else\n"
	"{ ans[x + y * x_size] = 0; }\n"
	"}\n";

int main() {
	cl_int status = 0;
	cl_uint numPlatforms;
	/// Get the number of the Platforms //
	status = clGetPlatformIDs( 0 , NULL , &numPlatforms );
	if( status != CL_SUCCESS ) { 
		printf( "Error:Getting Platforms.(clGetPlatformsIDs)\n" );
		return EXIT_FAILURE;
	}
	//

	if( numPlatforms <= 0 )
	{ printf("The number of the platform is ZERO\n"); return 0; }


	cl_platform_id *platforms = new cl_platform_id[numPlatforms];
	/// Get the lists of the platforms 
	status = clGetPlatformIDs( numPlatforms , platforms , NULL );
	if ( status != CL_SUCCESS)
	{
		printf( "Error:Getting Platform Ids.(clGetPlatformsIDs)\n" );
		return -1;
	}
	 Get the AMD platform //
	cl_platform_id platform;
	for( unsigned int i = 0 ; i < numPlatforms ; ++i ) {
		char pff[100];
		status = clGetPlatformInfo( platforms[i] , CL_PLATFORM_VENDOR , sizeof( pff ) , pff , NULL );
		platform = platforms[i];
		if( !strcmp( pff , "Avanced Micro Devices , Inc." ) )
		{ break; }
	}
	delete []platforms;
	/// Get the platform  context 
	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM , ( cl_context_properties )platform , 0 };
	cl_context_properties *cprops = ( NULL == platform ) ? NULL : cps;
	cl_context context = clCreateContextFromType( cprops , CL_DEVICE_TYPE_CPU , NULL , NULL , &status );
	if( status != CL_SUCCESS )
	{
		printf( "Error: Creating Context.( clCreateContextFromType )\n");
		return EXIT_FAILURE;
	}
	size_t deviceListSize;
	status = clGetContextInfo( context , CL_CONTEXT_DEVICES , 0 , NULL , &deviceListSize );
	if( status != CL_SUCCESS )
	{
		printf( "Error: Getting Context Info device list size. clGetContextInfo\n");
		return EXIT_FAILURE;
	}
	cl_device_id *devices = ( cl_device_id * )malloc( deviceListSize );
	if( devices == NULL )
	{
		printf( "Error: No devices found.\n");
		return EXIT_FAILURE;
	}
	status = clGetContextInfo( context , CL_CONTEXT_DEVICES , deviceListSize , devices , NULL );
	if( status != CL_SUCCESS )
	{
		printf( "Error: Getting Context Info device list , clGetContextInfo\n");
		return EXIT_FAILURE;
	}
	 Get the source code /
	size_t sourceSize[] = { strlen( KernelAverage ) };
	//clCreateProgramWithBinary( context , 1 ,devices , sourceSize , , ,);
	cl_program program = clCreateProgramWithSource( context , 1 , &KernelAverage , sourceSize , &status );
	if( status != CL_SUCCESS ) {
		printf( "Error: Loading Binary into cl_program( clCreateProgramWithBinary )\n");
		return EXIT_FAILURE;
	}
	/ Build Code /
	status = clBuildProgram( program , 1 , devices , NULL , NULL , NULL );
	if( status != CL_SUCCESS ) {
		printf( "Error: Building Program( clBuildProgram )\n  %d" , status );
		if( status == CL_BUILD_PROGRAM_FAILURE ) {
			cl_int logStatus;
			char * buildLog = NULL;
			size_t buildLogSize = 0;
			logStatus = clGetProgramBuildInfo( program , devices[0] , CL_PROGRAM_BUILD_LOG , buildLogSize , buildLog , &buildLogSize );

			buildLog = ( char* )malloc( buildLogSize );
			memset( buildLog , 0 , buildLogSize );
			logStatus = clGetProgramBuildInfo( program , devices[0] , CL_PROGRAM_BUILD_LOG , buildLogSize , buildLog , NULL );

			std::cout << " \n\t\t\tBUILD LOG\n";
			std::cout << buildLog << std::endl;
			free( buildLog );
		}
		return EXIT_FAILURE;
	}
	 Create Kernel ///
	cl_kernel kernel = clCreateKernel( program , "GetAverage" , &status );
	if( status != CL_SUCCESS ){
		printf( "Error: Creating Kernel from program.(clCreateKernel)\n");
		return EXIT_FAILURE;
	}
	 Create Command Queue /
	cl_command_queue commandQueue = clCreateCommandQueue( context , devices[0] , 0 , &status );
	if( status != CL_SUCCESS ) {
		printf( "Creating Command Queue.(clCreateCommandQueue)\n");
		return EXIT_FAILURE;
	}

	int ans[SIZE_Y][SIZE_X] = { 0 };
	int res[SIZE_Y][SIZE_X] = { 1 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
				  1 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
				  9 , 3 , 5 , 4 , 2 , 1 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 ,
				  7 , 3 , 5 , 9 , 9 , 9 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 9 , 9 , 9 , 7 , 5 , 3 , 3 ,
				  1 , 3 , 5 , 9 , 9 , 9 , 1 , 0 , 5 , 2 ,
				  6 , 4 , 2 , 1 , 3 , 9 , 7 , 5 , 3 , 3 };
	//memset( ans , 0 , sizeof( int ) * SIZE_X * SIZE_Y );
	
	/ Create Argument /
	cl_mem resBuffer = clCreateBuffer( context , CL_MEM_USE_HOST_PTR , SIZE_X * SIZE_Y * sizeof( int ) ,
		res , &status );
	cl_mem ansBuffer = clCreateBuffer( context , CL_MEM_USE_HOST_PTR , SIZE_X * SIZE_Y * sizeof( int ) ,
		ans , &status );
	if( status != CL_SUCCESS ) {
		printf( "Error: clCreateBuffer.\n");
		return EXIT_FAILURE;
	}
	status = clSetKernelArg( kernel , 0 , sizeof( cl_mem ) ,( void* )&resBuffer );
	status = clSetKernelArg( kernel , 1 , sizeof( cl_mem ) ,( void* )&ansBuffer );
	if( status != CL_SUCCESS ) {
		printf( "Error: Setting kernel argument.\n");
		return EXIT_FAILURE;
	}

	 Implement kernel 
	size_t globalThread[] = { 10 , 10 };
	size_t localThread[] = { 2 , 2 };
	status = clEnqueueNDRangeKernel( commandQueue , kernel , 2 , NULL , globalThread , localThread , 0 , NULL , NULL );
	if( status != CL_SUCCESS ) {
		printf( "Error: Enqueueing kernel\n");
		return EXIT_FAILURE;
	}
	/ Wait for kernel to finish 
	status = clFinish( commandQueue );
	if( status != CL_SUCCESS ) {
		printf( "Error: Finish command queue\n");
		return EXIT_FAILURE;
	}
	//
	printf("out:\n");
	for( int i = 0 ; i < 10 ; ++i ) {
		for( int j = 0 ; j < 10 ; ++j ) {
			printf( "%3d " , ans[i][j] );
		}
        printf("\n");
	}
	return 0;
}


注意:在OpenCL中在可能的情况下,尽量少用global类型的数据,要多用local和private,频繁的使用global会很影响性能。



  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个简单的 OpenCL 使用示例,它执行向量加法: ```c #include <stdio.h> #include <stdlib.h> #include <CL/cl.h> #define LENGTH 1024 const char *kernelSource = "__kernel void vectorAdd(__global float *a, __global float *b, __global float *c) {\n" " int i = get_global_id(0);\n" " c[i] = a[i] + b[i];\n" "}\n"; int main() { cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_mem a, b, c; cl_int err; int i; float *A, *B, *C; // 初始化 A, B, C 数组 A = (float*) malloc(sizeof(float) * LENGTH); B = (float*) malloc(sizeof(float) * LENGTH); C = (float*) malloc(sizeof(float) * LENGTH); for (i = 0; i < LENGTH; i++) { A[i] = (float) i; B[i] = (float) (LENGTH - i); C[i] = 0.0f; } // 获取平台和设备 err = clGetPlatformIDs(1, &platform, NULL); err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); // 创建 OpenCL 上下文和命令队列 context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); queue = clCreateCommandQueue(context, device, 0, &err); // 创建和编译内核程序 program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err); err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); // 创建内核 kernel = clCreateKernel(program, "vectorAdd", &err); // 创建和设置缓冲区 a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * LENGTH, NULL, &err); b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * LENGTH, NULL, &err); c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH, NULL, &err); err = clEnqueueWriteBuffer(queue, a, CL_TRUE, 0, sizeof(float) * LENGTH, A, 0, NULL, NULL); err = clEnqueueWriteBuffer(queue, b, CL_TRUE, 0, sizeof(float) * LENGTH, B, 0, NULL, NULL); // 设定内核参数 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c); // 执行内核 size_t globalSize = LENGTH; size_t localSize = 64; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL); // 读取结果 err = clEnqueueReadBuffer(queue, c, CL_TRUE, 0, sizeof(float) * LENGTH, C, 0, NULL, NULL); // 打印结果 for (i = 0; i < LENGTH; i++) { printf("%f + %f = %f\n", A[i], B[i], C[i]); } // 释放内存和资源 free(A); free(B); free(C); clReleaseMemObject(a); clReleaseMemObject(b); clReleaseMemObject(c); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; } ``` 这个程序的作用是将两个向量相加,并打印结果。程序首先初始化了三个数组 A, B, C,然后获取 OpenCL 平台和设备,创建上下文和命令队列,创建和编译内核程序,创建内核,创建和设置缓冲区,设定内核参数,执行内核,读取结果,打印结果,最后释放内存和资源。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值