WIN8 + AMD 下OpenCL 开发环境布置

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/zg260/article/details/79271954
1:下载AMD APP SDK 3.0 https://developer.amd.com/amd-accelerated-parallel-processing-app-sdk/

2:安装(过程可能比较慢),完成后重启。

3:运行->cmd, 在命令行窗口查看OpenCL环境变量是否存在,echo %AMDAPPSDKROOT%,  echo %PATH%, 
   存在说明安装成功。

4: 新建VS2013工程, 配置项目属性:
   C/C++->General->Additional Include Directories->$(AMDAPPSDKROOT)include
   Linker->General->Additional Library Directories->$(AMDAPPSDKROOT)lib\x86_64
   Linker->Input->Additional Dependencies->OpenCL.lib
   
5: OpenCL_HelloWorld的代码(查看OpenCL版本): 

#include "stdafx.h"
#include <stdlib.h>
#include <string.h>
#include "CL/cl.h"

int _tmain(int argc, _TCHAR* argv[])
{
	cl_platform_id *platforms; 

	cl_uint num_platforms; 

	cl_int i, err, platform_index = -1;

	err = clGetPlatformIDs(0, NULL, &num_platforms);

	if (err < 0)
	{
		perror("Couldn't find any platforms\n");
		return 1; 
	}

	printf("I have platforms: %d\n", num_platforms); 

	platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * num_platforms);

	clGetPlatformIDs(num_platforms, platforms, NULL);

	for (i = 0; i < num_platforms; i++)
	{
		err = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, NULL);
		if (err < 0)
		{
			perror("Couldn't read extension data.\n");
			return 1;
		}

		char *name = (char*)malloc(255);
		clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 255, name, NULL);
		printf("Platform%d name: %s\n", i, name);

		char *vendor = (char*)malloc(255);
		clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, 255, vendor, NULL);
		printf("Platform%d vendor: %s\n", i, vendor);

		char *version = (char*)malloc(255);
		clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, 255, version, NULL);
		printf("Platform%d version: %s\n", i, version);


	    platform_index = i;

		free(name);
		free(vendor);
		free(version);
   	}

	free(platforms);

	getchar();

	return 0;
}

6: OpenCL_HelloWorld的代码(加载并运行一个cl)

#include "stdafx.h"

#include <string>
#include <iostream>
#include <fstream>

#include "CL/cl.h"

using namespace std; 

#pragma warning( disable : 4996 )

cl_int convert_string(const char *pfile, std::string &str);

int _tmain(int argc, _TCHAR* argv[])
{
	cl_int          iStatus = 0;                // 函数返回状态  
	cl_uint         uiNumPlatforms = 0;         // 平台个数  
	cl_platform_id  Platform = NULL;            // 选择的平台  
	size_t          uiSize = 0;                 // 平台版本名字字节数      
	cl_int          iErr = 0;                   // 返回参数  
	char            *pName = NULL;              // 平台版本名  
	cl_uint         uiNumDevices = 0;           // 设备数量  
	cl_device_id    *pDevices = NULL;           // 设备  
	cl_context      Context = NULL;             // 设备环境  
	cl_command_queue    CommandQueue = NULL;    // 命令队列  
	const char      *pFileName = "HelloWorld_Kernel.cl";    // cl文件名
	const char      *pFileName1 = "D:\\project\\learn\\hell_cl\\x64\\Debug\\HelloWorld_Kernel.cl";
	string          strSource = "";             // 用于存储cl文件中的代码  
	const char      *pSource;                   // 代码字符串指针  
	size_t          uiArrSourceSize[] = { 0 };  // 代码字符串长度  
	cl_program      Program = NULL;             // 程序对象  
	const char      *pInput = "gdkknvnqkc";     // 输入字符串  
	size_t          uiStrlength = strlen(pInput);   // 输入字符串长度  
	char            *pOutput = NULL;                // 输出字符串  
	cl_mem          memInutBuffer = NULL;           // 输入内存对象  
	cl_mem          memOutputBuffer = NULL;         // 输出内存对象  
	cl_kernel       Kernel = NULL;                  // 内核对象  
	size_t          uiGlobal_Work_Size[1] = { 0 };  // 用于设定内核分布 



	//-------------------1. 获得并选择可用平台-----------------------------  
	// 查询可用的平台个数,并返回状态  
	iStatus = clGetPlatformIDs(0, NULL, &uiNumPlatforms);
	if (CL_SUCCESS != iStatus)
	{
		cout << "Error: Getting platforms error" << endl;
		return 0;
	}


	// 获得平台地址  
	if (uiNumPlatforms > 0)  // 如果有可用平台  
	{
		// 根据平台数为平台分配内存空间  
		cl_platform_id *pPlatforms = (cl_platform_id *)malloc(uiNumPlatforms * sizeof(cl_platform_id));

		// 获得可用的平台  
		iStatus = clGetPlatformIDs(uiNumPlatforms, pPlatforms, NULL);
		Platform = pPlatforms[0];   // 获得第一个平台的地址  
		free(pPlatforms);           // 释放平台占用的内存空间  
	}

	// 获得平台版本名  
	// 获得平台版本名的字节数  
	iErr = clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, 0, NULL, &uiSize);

	//根据字节数为平台版本名分配内存空间  
	pName = (char *)alloca(uiSize * sizeof(char));

	// 获得平台版本名字  
	iErr = clGetPlatformInfo(Platform, CL_PLATFORM_VERSION, uiSize, pName, NULL);
	cout << pName << endl;

	//--------------2. 查询GPU设备,并选择可用设备------------------------  
	// 获得GPU设备数量  
	iStatus = clGetDeviceIDs(Platform, CL_DEVICE_TYPE_GPU, 0, NULL, &uiNumDevices);
	if (0 == uiNumDevices)  // 如果没有GPU设备  
	{
		cout << "No GPU device available." << endl;
		cout << "Choose CPU as default device." << endl;

		// 选择CPU作为设备,获得设备数  
		iStatus = clGetDeviceIDs(Platform, CL_DEVICE_TYPE_CPU, 0, NULL, &uiNumDevices);

		// 为设备分配空间  
		pDevices = (cl_device_id *)malloc(uiNumDevices * sizeof(cl_device_id));

		// 获得平台  
		iStatus = clGetDeviceIDs(Platform, CL_DEVICE_TYPE_CPU, uiNumDevices, pDevices, NULL);
	}
	else
	{
		pDevices = (cl_device_id *)malloc(uiNumDevices * sizeof(cl_device_id));

		iStatus = clGetDeviceIDs(Platform, CL_DEVICE_TYPE_GPU, uiNumDevices, pDevices, NULL);
	}


	// -------------------3.创建设备环境---------------------------------  
	// 创建设备环境  
	Context = clCreateContext(NULL, 1, pDevices, NULL, NULL, NULL);
	if (NULL == Context)
	{
		cout << "Error: Can not create context" << endl;
		return 0;
	}

	// -------------------4.创建命令队列--------------------------------------  
	// 创建第1个设备的命令队列  
	CommandQueue = clCreateCommandQueue(Context, pDevices[0], 0, NULL);
	if (NULL == CommandQueue)
	{
		cout << "Error: Can not create CommandQueue" << endl;
		return 0;
	}

	// ----------------------5. 创建程序对象------------------------------  
	// 将cl文件中的代码转为字符串  
	iStatus = convert_string(pFileName1, strSource);

	pSource = strSource.c_str();            // 获得strSource指针  
	uiArrSourceSize[0] = strlen(pSource);   // 字符串大小  

	// 创建程序对象  
	Program = clCreateProgramWithSource(Context, 1, &pSource, uiArrSourceSize, NULL);
	if (NULL == Program)
	{
		cout << "Error: Can not create program" << endl;
		return 0;
	}

	// -----------------------------6. 编译程序--------------------------------  
	// 编译程序  
	iStatus = clBuildProgram(Program, 1, pDevices, NULL, NULL, NULL);
	if (CL_SUCCESS != iStatus)  // 编译错误  
	{
		cout << "Error: Can not build program" << endl;
		char szBuildLog[16384];
		clGetProgramBuildInfo(Program, *pDevices, CL_PROGRAM_BUILD_LOG, sizeof(szBuildLog), szBuildLog, NULL);

		cout << "Error in Kernel: " << endl << szBuildLog;
		clReleaseProgram(Program);

		return 0;
	}

	//-------------------------7. 并创建输入输出内核内存对象--------------------------------  

	// 创建输入内存对象  
	memInutBuffer = clCreateBuffer(
		Context,
		CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,  // 输入内存为只读,并可以从宿主机内存复制到设备内存  
		(uiStrlength + 1) * sizeof(char),         // 输入内存空间大小  
		(void *)pInput,
		NULL);

	// 创建输出内存对象  
	memOutputBuffer = clCreateBuffer(
		Context,
		CL_MEM_WRITE_ONLY,                  // 输出内存只能写  
		(uiStrlength + 1) * sizeof(char),   // 输出内存空间大小  
		NULL,
		NULL);

	if ((NULL == memInutBuffer) || (NULL == memOutputBuffer))
	{
		cout << "Error creating memory objects" << endl;
		return 0;
	}

	//--------------------------8. 创建内核对象-------------------------------------  
	Kernel = clCreateKernel(Program,
		"helloworld",  // cl文件中的入口函数  
		NULL);
	if (NULL == Kernel)
	{
		cout << "Error: Can not create kernel" << endl;
		return 0;
	}



	//----------------------------9. 设置内核参数----------------------------------  
	iStatus = clSetKernelArg(Kernel,
		0,      // 参数索引  
		sizeof(cl_mem),
		(void *)&memInutBuffer);

	iStatus |= clSetKernelArg(Kernel, 1, sizeof(cl_mem), (void *)&memOutputBuffer);

	if (CL_SUCCESS != iStatus)
	{
		cout << "Error setting kernel arguments" << endl;
	}

	// --------------------------10.运行内核---------------------------------  
	uiGlobal_Work_Size[0] = uiStrlength;  // 输入字符串大小  

	// 利用命令队列使将再设备上执行的内核排队  
	iStatus = clEnqueueNDRangeKernel(
		CommandQueue,
		Kernel,
		1,
		NULL,
		uiGlobal_Work_Size,  // 确定内核在设备上的多个处理单元间的分布  
		NULL,                // 确定内核在设备上的多个处理单元间的分布  
		0,
		NULL,
		NULL);


	if (CL_SUCCESS != iStatus)
	{
		cout << "Error: Can not run kernel" << endl;
		return 0;
	}

	// ----------------------------11. 将输出读取到主机内存  
	pOutput = (char *)malloc(uiStrlength + 1);  // uiStrlength  输入字符串长度  

	iStatus = clEnqueueReadBuffer(
		CommandQueue,       // 命令队列  
		memOutputBuffer,    // 输出内存对象  
		CL_TRUE,            // 内核读取结束之前该函数不会返回  
		0,
		uiStrlength * sizeof(char),
		pOutput,
		0,
		NULL,
		NULL);

	if (CL_SUCCESS != iStatus)
	{
		cout << "Error: Can not reading result buffer" << endl;
		return 0;
	}


	// ---------------------12--输出计算结果---------------  
	pOutput[uiStrlength] = '\0';
	cout << "Input String:" << endl;
	cout << pInput << endl;
	cout << "Output String:" << endl;
	cout << pOutput << endl;


	// -------------------------------13. 释放资源--------------------------------  
	iStatus = clReleaseKernel(Kernel);
	iStatus = clReleaseProgram(Program);
	iStatus = clReleaseMemObject(memInutBuffer);
	iStatus = clReleaseMemObject(memOutputBuffer);
	iStatus = clReleaseCommandQueue(CommandQueue);
	iStatus = clReleaseContext(Context);

	if (NULL != pOutput)
	{
		free(pOutput);
		pOutput = NULL;
	}

	if (NULL != pDevices)
	{
		free(pDevices);
		pDevices = NULL;
	}

	getchar();

}


// 将cl文件代码转为字符串  
cl_int convert_string(const char *pFileName, std::string &Str)
{
	size_t      uiSize = 0;
	size_t      uiFileSize = 0;
	char        *pStr = NULL;
	std::fstream fFile(pFileName, (std::fstream::in | std::fstream::binary));


	if (fFile.is_open())
	{
		fFile.seekg(0, std::fstream::end);
		uiSize = uiFileSize = (size_t)fFile.tellg();  // 获得文件大小  
		fFile.seekg(0, std::fstream::beg);
		pStr = new char[uiSize + 1];

		if (NULL == pStr)
		{
			fFile.close();
			return 0;
		}

		fFile.read(pStr, uiFileSize);               // 读取uiFileSize字节  
		fFile.close();
		pStr[uiSize] = '\0';
		Str = pStr;

		delete[] pStr;

		return 0;
	}

	cout << "Error: Failed to open cl file\n:" << pFileName << endl;

	return -1;
}




  

            
阅读更多
想对作者说点什么?

博主推荐

换一批

没有更多推荐了,返回首页