两个矩阵相加

#include<CL/cl.h>
#include<stdlib.h>
#include<stdio.h>

const char *KernelSource=
"__kernel void add(__global int *A,__global int *B,__global int *C)"
"{"
"    int i=get_global_id(0);"
"    C[i]=A[i]+B[i]; "
"}";

//自定义一个查错函数
void CheckErr(cl_int err, char *information)
{
if(err!=CL_SUCCESS)
{
printf("Error of %s\n",information);
system("pause");
exit(0);
}
}

int main()
{
const int BUFSIZE=20;
       size_t DataSize=sizeof(int)*BUFSIZE;
int MatrixA[BUFSIZE],  
             MatrixB[BUFSIZE], 
             MatrixC[BUFSIZE];
int i;
//为矩阵A随机赋值,并将其输出
for(i=0;i<BUFSIZE;i++)
{
MatrixA[i]=i;
printf("%d ",MatrixA[i]);
}
printf("\n");
//为矩阵B随机赋值,并将其输出
for(i=0;i<BUFSIZE;i++)
{
MatrixB[i]=20-i;
printf("%d ",MatrixB[i]);
}
printf("\n");
printf("CPU Result:\n");
//A与B实现相加存于C中并输出
for(i=0;i<BUFSIZE;i++)
{
MatrixC[i]=MatrixA[i]+MatrixB[i];
printf("%d ",MatrixC[i]);
}
printf("\n");
cl_int err;
//第一步构建OpenCL平台:
cl_platform_id platform;
err=clGetPlatformIDs(1,&platform,NULL);
CheckErr(err,"Platform");
//第二步?查询设备:
cl_device_id device;
err=clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
CheckErr(err,"Device");
//第三步设备上下文:
cl_context context;
context=clCreateContext(NULL,1,&device,NULL,NULL,&err);
CheckErr(err,"Context");
//第四步创建命令队列
cl_command_queue command_queue;
command_queue=clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&err);
CheckErr(err,"Command_queue");
//第五步创建缓冲
cl_mem MatrixBufferA;
cl_mem MatrixBufferB;
cl_mem MatrixBufferC;
int C[BUFSIZE];
MatrixBufferA=clCreateBuffer(context,CL_MEM_READ_ONLY,DataSize,NULL,&err);
CheckErr(err,"MatrixBufferA");
MatrixBufferB=clCreateBuffer(context,CL_MEM_READ_ONLY,DataSize,NULL,&err);
CheckErr(err,"MatrixBufferB");
MatrixBufferC=clCreateBuffer(context,CL_MEM_WRITE_ONLY,DataSize,NULL,&err);
CheckErr(err,"MatrixBufferC");
//第六步数据拷贝(主机到设备)
err=clEnqueueWriteBuffer(command_queue,MatrixBufferA,CL_FALSE,0,DataSize,MatrixA,0,NULL,NULL);
CheckErr(err,"Copy Data To BufferA");
err=clEnqueueWriteBuffer(command_queue,MatrixBufferB,CL_FALSE,0,DataSize,MatrixB,0,NULL,NULL);
CheckErr(err,"Copy Data To BufferB");
//第7步编译程序
cl_program program;
program=clCreateProgramWithSource(context,1,(const char **)&KernelSource,NULL,&err);
CheckErr(err,"CreateProgramWithSource");
err=clBuildProgram(program,1,&device,NULL,NULL,NULL);
CheckErr(err,"BulidProgram");
//第8步编译内核
cl_kernel kernel;
kernel=clCreateKernel(program,"add",&err);
CheckErr(err,"CreateKernel");
//第9步设置内核参数
err=clSetKernelArg(kernel,0,sizeof(cl_mem),&MatrixBufferA);
CheckErr(err,"SetKernelArg of MatrixBufferA");
err |=clSetKernelArg(kernel,1,sizeof(cl_mem),&MatrixBufferB);
CheckErr(err,"SetKernelArg of MatrixBufferB");
err |=clSetKernelArg(kernel,2,sizeof(cl_mem),&MatrixBufferC);
CheckErr(err,"SetKernelArg of MatrixBufferC");
//第10步配置工作线程
size_t globalWorkSize;
globalWorkSize=BUFSIZE;
//第11步内核执行队列
err=clEnqueueNDRangeKernel(command_queue,kernel,1,NULL,&globalWorkSize,NULL,0,NULL,NULL);
CheckErr(err,"EnqueueNDRangeKernel");
//第12步数据拷贝(设备到主机)
err=clEnqueueReadBuffer(command_queue,MatrixBufferC,CL_TRUE,0,DataSize,C,NULL,NULL,NULL);
printf("\nGPU Result:\n");
for(i=0;i<BUFSIZE;i++)
{
printf("%d ",C[i]);
}
printf("\n");
//第13步释放资源
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(command_queue);
clReleaseMemObject(MatrixBufferA);
clReleaseMemObject(MatrixBufferB);
clReleaseMemObject(MatrixBufferC);--
clReleaseContext(context);
clReleaseDevice(device);
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值