opencl开发环境配置

最新推荐文章于 2023-12-08 15:05:24 发布

算法工程师000001

最新推荐文章于 2023-12-08 15:05:24 发布

阅读量1.8k

点赞数

分类专栏： opencl 文章标签： opencl 环境配置

本文链接：https://blog.csdn.net/u012361418/article/details/46440327

版权

opencl 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

我的机器里面装有nvidia独显和intel集显，sdk使用intel的开发包。

1.首先是下载intel sdk，地址为：

http://registrationcenter.intel.com/irc_nas/4665/intel_sdk_for_ocl_applications_2014_x64_setup.msi

下载后直接点击安装。

2.配置vs（我用的是vs2010 64位）

新建一个控制台项目，然后在属性页里面进行配置：

在属性页里面找到“C/C++”的“常规”项，点击，右边有“附加包含目录”，然后编辑，添加目录：../include

在属性页里面找到“连接器”，点击其“常规”项，右边有“附加库目录”，然后也是编辑，添加目录：../lib/x64

在输入选项只用，右边“附加依赖项”，编辑，添加lib文件：OpenCL.lib

3.测试用例

3.1新建一个opencl文件 cl_kernel.cl 代码为

__kernel void MyCLAdd(__global int *dst, __global int *src1, __global int *src2)
{
    int index = get_global_id(0);
    dst[index] = src1[index] + src2[index];
}

3.2新建一个cpp文件 test.cpp 代码为

#include <cl.h>
#include <stdio.h>
#include <iostream>
using namespace std;
#pragma warning( disable : 4996 )
void DisplayPlatformInfo(cl_platform_id id, cl_platform_info name, char *str){
cl_int errNum;
std::size_t paramValueSize;
errNum = clGetPlatformInfo(id, name, 0, NULL, ¶mValueSize);
if (errNum != CL_SUCCESS){
std::cerr << "Failed to find OpenCL platform ";
std::cerr << str << "." << endl;
return;
}
char *info = (char *)alloca(sizeof(char)*paramValueSize);
errNum = clGetPlatformInfo(id, name, paramValueSize, info, NULL);
if (errNum != CL_SUCCESS){
std::cerr << "Failed to find OpenCL platform " << str << "." << std::endl;
return;
}
std::cout << "\t" << str << ":\t" << info << std::endl;
}
int main(void)
{
cl_uint numPlatforms = 0;           //the NO. of platforms
cl_platform_id platform = nullptr;  //the chosen platform
cl_context context = nullptr;       // OpenCL context
cl_command_queue commandQueue = nullptr;
cl_program program = nullptr;       // OpenCL kernel program object that'll be running on the compute device
cl_mem input1MemObj = nullptr;      // input1 memory object for input argument 1
cl_mem input2MemObj = nullptr;      // input2 memory object for input argument 2
cl_mem outputMemObj = nullptr;      // output memory object for output
cl_kernel kernel = nullptr;         // kernel object


cl_int    status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS)
{
cout << "Error: Getting platforms!" << endl;
return 0;
}


/*For clarity, choose the first available platform. */
if (numPlatforms > 0)
{
cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms* sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
for (cl_uint i = 0; i < numPlatforms; i++){
DisplayPlatformInfo(platforms[i], CL_PLATFORM_NAME, "cl_platform_name");
DisplayPlatformInfo(platforms[i], CL_PLATFORM_PROFILE, "cl_platform_profile");
DisplayPlatformInfo(platforms[i], CL_PLATFORM_VERSION, "cl_platform_version");
DisplayPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, "cl_platform_vendor");
DisplayPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, "cl_platform_extensions");

}
platform = platforms[0];
free(platforms);
}
else
{
puts("Your system does not have any OpenCL platform!");
return 0;
}


/*Step 2:Query the platform and choose the first GPU device if has one.Otherwise use the CPU as device.*/
cl_uint                numDevices = 0;
cl_device_id        *devices;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if (numDevices == 0) //no GPU available.
{
cout << "No GPU device available." << endl;
cout << "Choose CPU as default device." << endl;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &numDevices);
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));


status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, numDevices, devices, NULL);
}
else
{
devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
cout << "The number of devices: " << numDevices << endl;

}


/*Step 3: Create context.*/
context = clCreateContext(NULL, 1, devices, NULL, NULL, NULL);


/*Step 4: Creating command queue associate with the context.*/
commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);


/*Step 5: Create program object */
// Read the kernel code to the buffer
FILE *fp = fopen("cl_kernel.cl", "rb");
if (fp == nullptr)
{
puts("The kernel file not found!");
goto RELEASE_RESOURCES;
}
fseek(fp, 0, SEEK_END);
size_t kernelLength = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *kernelCodeBuffer = (char*)malloc(kernelLength + 1);
fread(kernelCodeBuffer, 1, kernelLength, fp);
kernelCodeBuffer[kernelLength] = '\0';
fclose(fp);


const char *aSource = kernelCodeBuffer;
program = clCreateProgramWithSource(context, 1, &aSource, &kernelLength, NULL);


/*Step 6: Build program. */
status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);


/*Step 7: Initial inputs and output for the host and create memory objects for the kernel*/
int __declspec(align(32)) input1Buffer[128];    // 32 bytes alignment to improve data copy
int __declspec(align(32)) input2Buffer[128];
int __declspec(align(32)) outputBuffer[128];


// Do initialization
int i;
for (i = 0; i < 128; i++)
input1Buffer[i] = input2Buffer[i] = i + 1;
memset(outputBuffer, 0, sizeof(outputBuffer));


// Create mmory object
input1MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 128 * sizeof(int), input1Buffer, nullptr);
input2MemObj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 128 * sizeof(int), input2Buffer, nullptr);
outputMemObj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 128 * sizeof(int), NULL, NULL);


/*Step 8: Create kernel object */
kernel = clCreateKernel(program, "MyCLAdd", NULL);


/*Step 9: Sets Kernel arguments.*/
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&outputMemObj);
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&input1MemObj);
status = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&input2MemObj);


/*Step 10: Running the kernel.*/
size_t global_work_size[1] = { 128 };
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
clFinish(commandQueue);     // Force wait until the OpenCL kernel is completed


/*Step 11: Read the cout put back to host memory.*/
status = clEnqueueReadBuffer(commandQueue, outputMemObj, CL_TRUE, 0, global_work_size[0] * sizeof(int), outputBuffer, 0, NULL, NULL);


printf("Veryfy the rsults... ");
for (i = 0; i < 128; i++)
{
if (outputBuffer[i] != (i + 1) * 2)
{
puts("Results not correct!");
break;
}
}
if (i == 128)
puts("Correct!");


RELEASE_RESOURCES:
/*Step 12: Clean the resources.*/
status = clReleaseKernel(kernel);//*Release kernel.
status = clReleaseProgram(program);    //Release the program object.
status = clReleaseMemObject(input1MemObj);//Release mem object.
status = clReleaseMemObject(input2MemObj);
status = clReleaseMemObject(outputMemObj);
status = clReleaseCommandQueue(commandQueue);//Release  Command queue.
status = clReleaseContext(context);//Release context.


free(devices);
int t;
std::cin >> t;
}

4.成功。