/*kernel.cl*/
__kernel void vecadd(
__global uint *A,
__global uint *B,
__global uint *C
)
{
const int index=get_global_id(0);
C[index]=A[index]+B[index];
}
/*main.cpp*/
#include<CL/cl.h>
#include<stdio.h>
#include<iostream>
#include<string.h>
#include <fstream>
#include <vector>
#define elements 2048
using namespace std;
inline void checkErr(cl_int err,const char *name)
{
if(err!=CL_SUCCESS){
cerr<<"ERROR:"<<name<<"("<<err<<")"<<endl;
exit(EXIT_FAILURE);
}
}
int main()
{
cl_uint numPlatforms;
cl_platform_id *platforms;
cl_int status;
cl_uint numDevices;
cl_device_id *devices;
cl_context context;
cl_command_queue cmdQueue;
cl_mem bufferA;
cl_mem bufferB;
cl_mem bufferC;
cl_program program;
cl_kernel kernel;
int *A,*B,*C;
size_t datasize=elements*sizeof(int);
A=(int*)malloc(datasize);
B=(int*)malloc(datasize);
C=(int*)malloc(datasize);
for(int i=0;i<elements;i++){
A[i]=2;
B[i]=3;
}
//
status=clGetPlatformIDs(0,NULL,&numPlatforms);
platforms=(cl_platform_id*)malloc(sizeof(cl_platform_id)*numPlatforms);
status=clGetPlatformIDs(numPlatforms,platforms,NULL);
checkErr(status,"clGetPlatformIDs");
status=clGetDeviceIDs(platforms[0],CL_DEVICE_TYPE_ALL,0,NULL,&numDevices);
checkErr(status,"clGetDeviceIDs");
devices=(cl_device_id*)malloc(sizeof(cl_device_id)*numDevices);
status=clGetDeviceIDs(platforms[0],CL_DEVICE_TYPE_ALL,numDevices,devices,NULL);
checkErr(status,"clGetDeviceIDs");
#if 1
cout<<"Number of Platforms:"<<numPlatforms<<endl;
cout<<"Number of Devices:"<<numDevices<<endl;
int maxComputeUnits;
size_t size;
clGetDeviceInfo(devices[0],CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&maxComputeUnits,&size);
cout<<"设备计算单元个数为:"<<maxComputeUnits<<endl;
#endif
context=clCreateContext(NULL,numDevices,devices,NULL,NULL,&status);
checkErr(status,"clCreateContext");
cmdQueue=clCreateCommandQueue(context,devices[0],0,&status);
checkErr(status,"clCreateCommandQueue");
//创建设备缓冲
bufferA=clCreateBuffer(context,CL_MEM_READ_ONLY,datasize,NULL,&status);
checkErr(status,"bufferA");
bufferB=clCreateBuffer(context,CL_MEM_READ_ONLY,datasize,NULL,&status);
checkErr(status,"bufferB");
bufferC=clCreateBuffer(context,CL_MEM_WRITE_ONLY,datasize,NULL,&status);
checkErr(status,"bufferC");
//把主机数据读取到设备缓冲
status=clEnqueueReadBuffer(cmdQueue,bufferA,CL_FALSE,0,datasize,A,0,NULL,NULL);
checkErr(status,"clEnqueueReadBuffer");
status=clEnqueueReadBuffer(cmdQueue,bufferB,CL_FALSE,0,datasize,B,0,NULL,NULL);
checkErr(status,"clEnqueueReadBuffer");
//创建并编译程序
std::ifstream srcFile("kernel.cl");
std::string srcProg(istreambuf_iterator<char>(srcFile),(istreambuf_iterator<char>()));
const char*src= srcProg.c_str();
size_t length=srcProg.length();
program=clCreateProgramWithSource(context,1,&src,&length,&status);
checkErr(status,"clCreateProgramWithSource");
status=clBuildProgram(program,numDevices,devices,NULL,NULL,NULL);
checkErr(status,"clBuildProgram");
//创建kernel
kernel=clCreateKernel(program,"vecadd",&status);
checkErr(status,"clCreateKernel");
//设置kernel参数
status=clSetKernelArg(kernel,0,sizeof(cl_mem),&bufferA);
checkErr(status,"clSetKernelArgA");
status=clSetKernelArg(kernel,1,sizeof(cl_mem),&bufferB);
checkErr(status,"clSetKernelArgB");
status=clSetKernelArg(kernel,2,sizeof(cl_mem),&bufferC);
checkErr(status,"clSetKernelArgC");
//配置执行参数
const size_t globalWorkeSize[1]={elements};
status=clEnqueueNDRangeKernel(cmdQueue,kernel,1,NULL,globalWorkeSize,NULL,0,NULL,NULL);
checkErr(status,"clEnqueueNDRangeKernel");
//读取计算结果到主机端
status=clEnqueueReadBuffer(cmdQueue,bufferC,CL_TRUE,0,datasize,C,0,NULL,NULL);
checkErr(status,"clEnqueueReadBuffer");
bool result=true;
for(int i=0;i<elements;i++){
if(C[i]!=5){
result=false;
break;
}
}
if(result==false){
cout<<"passed!\n"<<endl;
}
else{
cout<<"error!\n"<<endl;
}
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(cmdQueue);
clReleaseMemObject(bufferA);
clReleaseMemObject(bufferB);
clReleaseMemObject(bufferC);
clReleaseContext(context);
free(platforms);
free(devices);
free(A);
free(B);
free(C);
cout<<"Over!\n"<<endl;
getchar();
return 0;
}