AMD-SDK的学习[3]--BasicDebug

一、BasicDebug

main:

#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <cstring>
#include <string>
#include <fstream>

#define SUCCESS 0
#define FAILURE 1
#define EXPECTED_FAILURE 2

#define  GlobalThreadSize 256
#define  GroupSize 64

using namespace std;

int main()
{
	//set up OpenCL...
	cl_uint platformNum;
	cl_int status;
	status=clGetPlatformIDs(0,NULL,&platformNum);
	if(status!=CL_SUCCESS){
		printf("cannot get platforms number.\n");
		return -1;
	}
	cl_platform_id* platforms;
	platforms=(cl_platform_id*)alloca(sizeof(cl_platform_id)*platformNum);
	status=clGetPlatformIDs(platformNum,platforms,NULL);
	if(status!=CL_SUCCESS){
		printf("cannot get platforms addresses.\n");
		return -1;
	}
	cl_platform_id platformInUse=platforms[0];
	cl_device_id device;
	status=clGetDeviceIDs(platformInUse,CL_DEVICE_TYPE_DEFAULT,1,&device,NULL);
	cl_context context=clCreateContext(NULL,1,&device,NULL,NULL,&status);
	cl_command_queue_properties prop=0; //CL_QUEUE_PROFILING_ENABLE;
	cl_command_queue_properties *propers;
	propers=∝
	cl_command_queue commandQueue=clCreateCommandQueueWithProperties(context,device,propers, &status);
	std::ifstream srcFile("/home/jumper/OpenCL_projects/AMD-Sample-BasicDebug/BasicDebug_Kernel.cl");
	std::string srcProg(std::istreambuf_iterator<char>(srcFile),(std::istreambuf_iterator<char>()));
	const char * src = srcProg.c_str();
	size_t srclength = srcProg.length();
	cl_program program=clCreateProgramWithSource(context,1,&src,&srclength,&status);

	char buildoption[128];
	sprintf(buildoption,"-g -D WGSIZE=%d",GroupSize);
	status=clBuildProgram(program,1,&device,buildoption,NULL,&status);
	if (status != CL_SUCCESS)
	 {
		 cout<<"error:Build BasicDebug_Kernel()..."<<endl;
		 return(EXIT_FAILURE);
	 }

	//set input data
	cl_uint inputSizeBytes = GlobalThreadSize *  sizeof(cl_uint);
	cl_float *input = (cl_float *) malloc(inputSizeBytes);
	for(int i=0;i< GlobalThreadSize;i++)
	{
		input[i] = (float)i;
	}
	//create input buffer
	cl_mem inputBuffer = clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,sizeof(cl_uint) * GlobalThreadSize,(void *)input,&status);
	if (status != CL_SUCCESS)
	{
		std::cout<<"Error: Creating input buffer failed!"<<std::endl;
		return FAILURE;
	}
	//create output buffer
	cl_mem outputBuffer = clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(cl_uint) * GlobalThreadSize,NULL, &status);
	if (status != CL_SUCCESS)
	{
		std::cout<<"Error: Creating output buffer failed!"<<std::endl;
		return FAILURE;
	}
	cl_kernel kernel1 = clCreateKernel(program, "printfKernel", &status);
	if (status != CL_SUCCESS)
	{
		std::cout<<"Error: Creating printfKernel failed!"<<std::endl;
		return FAILURE;
	}
	//set kernel args.
	status = clSetKernelArg(kernel1, 0, sizeof(cl_mem), (void *)&inputBuffer);
	//create debug kernel
	cl_kernel kernel2 = clCreateKernel(program, "debugKernel2", &status);
	if (status != CL_SUCCESS)
	{
		std::cout<<"Error: Creating debugKernel2 failed!"<<std::endl;
		return FAILURE;
	}
	//set kernel args.
	status = clSetKernelArg(kernel2, 0, sizeof(cl_mem), (void *)&inputBuffer);
	status = clSetKernelArg(kernel2, 1, sizeof(cl_mem), (void *)&outputBuffer);
	size_t global_threads[1];
	size_t local_threads[1];
	global_threads[0] = GlobalThreadSize;
	local_threads[0] = GroupSize;

	//execute the kernel
	status = clEnqueueNDRangeKernel(commandQueue, kernel1, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
	if (status != CL_SUCCESS)
	{
		std::cout<<"Error: Enqueue kernel onto command queue failed!"<<std::endl;
		return FAILURE;
	}
	status = clFinish(commandQueue);

	status = clEnqueueNDRangeKernel(commandQueue, kernel2, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
	if (status != CL_SUCCESS)
	{
		std::cout<<"Error: Enqueue kernel onto command queue failed!"<<std::endl;
		return FAILURE;
	}
	status = clFinish(commandQueue);


	// Clean the resources.
	status = clReleaseKernel(kernel1);//Release kernel.
	status = clReleaseKernel(kernel2);
	status = clReleaseMemObject(inputBuffer);//Release mem object.
	status = clReleaseMemObject(outputBuffer);
	status = clReleaseProgram(program);//Release program.
	status = clReleaseCommandQueue(commandQueue);//Release command queue.
	status = clReleaseContext(context);//Release context.
	status = clReleaseDevice(device);
	free(input);
	std::cout<<"Passed!\n";

	return 0;
}
cl文件:

//#define WGSIZE 64
__kernel void printfKernel(__global float *inputbuffer)
{
	uint globalID = get_global_id(0);
	uint groupID = get_group_id(0);
	uint localID = get_local_id(0);
	__local int data[WGSIZE];
	int idx = WGSIZE - 1;

	if(idx == globalID)
	{
		float4 f = (float4)(inputbuffer[0], inputbuffer[1], inputbuffer[2], inputbuffer[3]);
		printf("Output vector data: f4 = %2.2v4hlf\n", f); 
	}
	
	data[localID] = localID;
	barrier(CLK_LOCAL_MEM_FENCE);

	if(idx == localID)
	{
		printf("\tThis is group %d\n",groupID);
		printf("\tOutput LDS data:  %d\n",data[idx]);
	}
	printf("the global ID of this thread is : %d\n",globalID);
}


__kernel void debugKernel2(__global float *inputbuffer,__global float *outputbuffer)
{
	uint globalID = get_global_id(0);
	uint value = 0;
	value = inputbuffer[globalID];
	outputbuffer[globalID] = value;
}
补充:所有的clReleaseEvent()前忘记了一句:clWaitForEvents(1,&); 补上去!!!

这个很简单,没必要说什么!但是有一个新地方:我看到kernel里需要某个常量时,以前我是1、直接用字面值常量(这种比较麻烦);2、在cl中用#define XXX XX(相对简单);这个例子还给出了第三种方式:3、使用clBuildProgram的buildOption传进kernel!!!

char buildoption[128];
	sprintf(buildoption,"-g -D WGSIZE=%d",GroupSize);
	status=clBuildProgram(program,1,&device,buildoption,NULL,&status);
这样将WGSIZE(GroupSize即64)传进cl文件里!






  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

元气少女缘结神

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值