OpenCL_CPU加速矩阵运算

本博文用的是intel的opencl架构,下载链接https://software.intel.com/en-us/intel-opencl/download,默认安装即可

注意:安装完毕后opencl的sdk在路径C:\Program Files (x86)\Intel\OpenCL SDK\6.3下

 

第一步:检验计算机硬件设备

安装完毕检验硬件设备,查看平台数量,代码如下:

 

#include <iostream>
#include <malloc.h>
#include <CL/cl.h>//包含CL的头文件

using namespace std;

//根据参数,判断设备类别。是CPU、GPU、ACCELERATOR或其他设备
const char* GetDeviceType(cl_device_type it)
{
	if (it == CL_DEVICE_TYPE_CPU)
		return "CPU";
	else if (it == CL_DEVICE_TYPE_GPU)
		return "GPU";
	else if (it == CL_DEVICE_TYPE_ACCELERATOR)
		return "ACCELERATOR";
	else
		return "DEFAULT";

}

int main()
{
	char dname[512];
	cl_device_id devices[20];
	cl_platform_id* platform_id = NULL;
	cl_uint num_devices;
	cl_device_type int_type;
	cl_ulong long_entries;
	cl_uint num_platform;
	cl_int err;

	//查询系统上可用的计算平台,可以理解为初始化
	err = clGetPlatformIDs(0, NULL, &num_platform);

	if (err != CL_SUCCESS)
	{
		cout << "clGetPlatformIDs error" << endl;
		return 0;
	}

	cout << "PlatForm num:" << num_platform << endl;

	int st = 0;

	platform_id = new cl_platform_id[num_platform];

	err = clGetPlatformIDs(num_platform, platform_id, NULL);

	if (err != CL_SUCCESS)
	{
		cout << "clGetPlatformIDs error" << endl;
		return 0;
	}

	for (st = 0; st<num_platform; st++)
	{
		cout << "----------------------------------" << endl;
		cout << "Platform " << st + 1 << endl;

		//获取可用计算平台的名称
		clGetPlatformInfo(platform_id[st], CL_PLATFORM_NAME, 512, dname, NULL);
		cout << "CL_PLATFORM_NAME:" << dname << endl;

		//获取可用计算平台的版本号,即OpenCL的版本号
		clGetPlatformInfo(platform_id[st], CL_PLATFORM_VENDOR, 512, dname, NULL);
		cout << "CL_PLATFORM_VERSION:" << dname << endl;

		//获取可用计算平台的设备数目
		clGetDeviceIDs(platform_id[st], CL_DEVICE_TYPE_ALL, 20, devices, &num_devices);
		cout << "Device num:" << num_devices << endl;

		unsigned int n = 0;

		//循环两次,检测两个设备的属性
		for (n = 0; n<num_devices; n++)
		{
			cout << endl << "Device " << n + 1 << endl;
			//获取设备名称
			clGetDeviceInfo(devices[n], CL_DEVICE_NAME, 512, dname, NULL);
			cout << "Device :" << dname << endl;

			//获取设备类别
			clGetDeviceInfo(devices[n], CL_DEVICE_TYPE, sizeof(cl_device_type),
				&int_type, NULL);
			cout << "Device Type:" << GetDeviceType(int_type) << endl;

			//获取设备版本号
			clGetDeviceInfo(devices[n], CL_DRIVER_VERSION, 512, dname, NULL);
			cout << "Device version:" << dname << endl;

			//获取设备全局内存大小
			clGetDeviceInfo(devices[n], CL_DEVICE_GLOBAL_MEM_SIZE,
				sizeof(cl_ulong), &long_entries, NULL);
			cout << "Device global mem(MB):" <<
				long_entries / 1024 / 1024 << endl;

			//获取设备CACHE内存大小
			clGetDeviceInfo(devices[n], CL_DEVICE_GLOBAL_MEM_CACHE_SIZE,
				sizeof(cl_ulong), &long_entries, NULL);
			cout << "Device global mem cache(KB):" <<
				long_entries / 1024 << endl;

			//获取本地内存大小
			clGetDeviceInfo(devices[n], CL_DEVICE_LOCAL_MEM_SIZE,
				sizeof(cl_ulong), &long_entries, NULL);
			cout << "Device Locale mem(KB) :" << long_entries / 1024 << endl;

			//获取设备频率
			clGetDeviceInfo(devices[n], CL_DEVICE_MAX_CLOCK_FREQUENCY,
				sizeof(cl_ulong), &long_entries, NULL);
			cout << "Device Max clock(MHz) :" << long_entries << endl;

			//获取最大工作组数
			clGetDeviceInfo(devices[n], CL_DEVICE_MAX_WORK_GROUP_SIZE,
				sizeof(cl_ulong), &long_entries, NULL);
			cout << "Device Max Group size :" << long_entries << endl;

			//获取最大计算核心数
			clGetDeviceInfo(devices[n], CL_DEVICE_MAX_COMPUTE_UNITS,
				sizeof(cl_ulong), &long_entries, NULL);
			cout << "Device Max parallel cores:" << long_entries << endl;

		}
	}

	return 0;
}

 

 

 

 

 

如果发现cpu的opencl版本为实验版本2.1,需要runtime配置文件,安装即可。

配置文件链接:http://pan.baidu.com/s/1geNnMy3 密码:7n8o

再次检验硬件设备,查看平台数量,可以发现cpu的opencl版本正确。

 

第二步:矩阵运算加速

代码如下:

核函数vecadd.cl文件如下:

 

__kernel void vecAdd(__global int* A,
        __global int* B,
        __global int* C)
{
    //获取当前工作项所在位置(线程索引号)
    //就是向量每一维的位置
    int idx = get_global_id(0);
    C[idx] = A[idx] + B[idx];
}

 

 

 

 

 

主函数main.cpp文件如下:

 

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <string>
#include <vector>
#include <CL/cl.h>//包含CL的头文件

//OpenCl 2.1
//Solve problem: error C4996: 'clCreateCommandQueue': 被声明为已否决
#pragma warning( disable : 4996 )

using namespace std;

//100维向量
#define elements 100

//从外部文件获取cl内核代码
bool GetFileData(const char* fname, string& str)
{
	FILE* fp = fopen(fname, "r");
	if (fp == NULL)
	{
		printf("no found filen");
		return false;
	}

	int n = 0;
	while (feof(fp) == 0)
	{
		str += fgetc(fp);
	}

	return true;
}

int main()
{
	//先读外部CL核心代码,如果失败则退出。
	//代码存buf_code里面
	string code_file;

	if (false == GetFileData("vecadd.cl", code_file))
	{
		return 0;
	}

	char* buf_code = new char[code_file.size()];
	strcpy(buf_code, code_file.c_str());
	buf_code[code_file.size() - 1] = NULL;

	//声明CL所需变量。
	cl_device_id device;
	cl_platform_id *platform_id = NULL;
	cl_context context;
	cl_command_queue cmdQueue;
	cl_mem bufferA, bufferB, bufferC;
	cl_program program;
	cl_kernel kernel = NULL;

	//我们使用的是一维向量
	//设定向量大小(维数)
	size_t globalWorkSize[1];
	globalWorkSize[0] = elements;

	cl_int err;

	//定义输入变量和输出变量,并设定初值
	int* buf_A = new int[elements];
	int* buf_B = new int[elements];
	int* buf_C = new int[elements];

	size_t datasize = sizeof(int) * elements;

	for (int i = 0; i < elements; i++)
	{
		buf_A[i] = (float)i;
		buf_B[i] = (float)i + 1.0;
	}

	//step 1:初始化OpenCL
	cl_uint num_platform;
	err = clGetPlatformIDs(0, NULL, &num_platform);

	platform_id = new cl_platform_id[num_platform];
	err = clGetPlatformIDs(num_platform, platform_id, NULL);

	if (err != CL_SUCCESS)
	{
		cout << "clGetPlatformIDs error" << endl;
		return 0;
	}

	//博主计算机三个plantform,platform_id[2]为CPU,根据情况来改
	clGetDeviceIDs(platform_id[2], CL_DEVICE_TYPE_CPU, 1, &device, NULL);

	//step 2:创建上下文
	context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);

	//step 3:创建命令队列
	cmdQueue = clCreateCommandQueue(context, device, 0, NULL);

	//step 4:创建数据缓冲区
	bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, NULL);
	bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, NULL);
	bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize, NULL, NULL);

	//step 5:将数据上传到缓冲区
	clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, 0, datasize, buf_A, 0, NULL, NULL);
	clEnqueueWriteBuffer(cmdQueue, bufferB, CL_FALSE, 0, datasize, buf_B, 0, NULL, NULL);

	//step 6:加载编译代码,创建内核调用函数
	program = clCreateProgramWithSource(context, 1, (const char**)&buf_code, NULL, NULL);
	clBuildProgram(program, 1, &device, NULL, NULL, NULL);
	kernel = clCreateKernel(program, "vecAdd", NULL);

	//step 7:设置参数,执行内核
	clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferA);
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferB);
	clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufferC);

	clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);

	//step 8:取回计算结果
	clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, 0, datasize, buf_C, 0, NULL, NULL);

	//输出验证结果
	cout << buf_A[0] << "+" << buf_B[0] << "=" << buf_C[0] << endl;
	cout << buf_A[elements - 1] << "+" << buf_B[elements - 1] << "=" << buf_C[elements - 1] << endl;

	//释放所有调用和内存
	clReleaseKernel(kernel);
	clReleaseProgram(program);
	clReleaseCommandQueue(cmdQueue);
	clReleaseMemObject(bufferA);
	clReleaseMemObject(bufferB);
	clReleaseMemObject(bufferC);
	clReleaseContext(context);

	delete[]platform_id;
	delete[]buf_A;
	delete[]buf_B;
	delete[]buf_C;
	delete[]buf_code;
	system("pause");
	return 0;
}

 

 

 

 

 

测试结果显示如下:

 

 

任何问题请加唯一QQ2258205918(名称samylee)!

唯一VX:samylee_csdn

 

  • 2
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值