opencl intel平台编译Demo环境 (vc code + cmake)

最新推荐文章于 2024-05-15 09:56:47 发布

文森88

最新推荐文章于 2024-05-15 09:56:47 发布

阅读量1.4k

点赞数

分类专栏：个人语言基础

本文链接：https://blog.csdn.net/zengyubao1/article/details/112725496

版权

语言基础同时被 2 个专栏收录

19 篇文章 0 订阅

订阅专栏

个人

8 篇文章 0 订阅

订阅专栏

资源

下载开发软件：
https://software.intel.com/content/www/us/en/develop/articles/opencl-drivers.html?wapkw=opencl

Intel® CPU Runtime for OpenCL™ Applications 18.1 for Windows* OS (64bit or 32bit)

https://fpgasoftware.intel.com/opencl/19.1/?edition=pro
面向OpenCL的Intel SDK编程指南

OpenCL Windows下使用OpenCL
https://blog.csdn.net/ytffhew/article/details/83722732

重要参考

OpenCL入门一：Intel核心显卡OpenCL环境搭建
https://blog.csdn.net/asmartkiller/article/details/86095773

安装

w_opencl_runtime_p_2021.1.1.191.exe
C:\Program Files (x86)\Common Files\Intel\OpenCL

intel_sdk_for_opencl_applications_2020.3.494.zip
C:\Program Files (x86)\IntelSWTools\system_studio_2020

头文件位置：
C:\Program Files (x86)\IntelSWTools\system_studio_2020\OpenCL\sdk\include\CL
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-32Zu4S97-1610804754639)(/images/1/138/1610793968905.png)]

代码

在Windows 下（以及可能所有其它的OS 下），都是
#include <CL/cl.h>

共享库
OpenCL.dll OpenCL.lib

Cmake编译

cmake_minimum_required(VERSION 3.0.0)
project(capsbasic VERSION 0.1.0)

include(CTest)
enable_testing()

add_executable(capsbasic capsbasic.cpp)

set(OpenGL_INCLUDE_DIRS "C:/Program\ Files\ (x86)/IntelSWTools/system_studio_2020/OpenCL/sdk/include/")
include_directories(./ ${OpenGL_INCLUDE_DIRS})

set(OpenGL_LID_DIR "C:/Program\ Files\ (x86)/IntelSWTools/system_studio_2020/OpenCL/sdk/lib/x64")

#查找全部的lib文件，并添加依赖
file(GLOB test_LIBS "${OpenGL_LID_DIR}/*.lib")
target_link_libraries(capsbasic ${test_LIBS})

message("LOGD ${test_LIBS}")

set(CPACK_PROJECT_NAME ${PROJECT_NAME})
set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})

include(CPack)

源代码

环境：
intel GPU
opencl 1.2

重要修改点：

#define CL_TARGET_OPENCL_VERSION 120

    const int elements = 512; // query my GPU's CL_DEVICE_MAX_WORK_GROUP_SIZE is 512

    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufA);
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufB);
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufC);

示例代码:

#define CL_TARGET_OPENCL_VERSION 120
#include <cstdlib>
#include <iostream>
#include <iomanip>
#include <cstring>
#include <cassert>

#define __CL_ENABLE_EXCEPTIONS
#include <CL/cl.h>
#include <fstream>
#include <string>
#include <vector>

using namespace std;

#define OPENCL_CHECK_ERRORS(ERR)        \
    if(ERR != CL_SUCCESS)                  \
    {                                      \
    cerr                                   \
    << "OpenCL error with code " << ERR    \
    << " happened in file " << __FILE__    \
    << " at line " << __LINE__             \
    << ". Exiting...\n";                   \
    exit(1);                               \
    }

#define MAX_DETECT_NUM (128)


// OpenCL kernel to perform an element-wise addition
const char *programSource = \
"__kernel \n \
void vecadd(__global int *A, \n \
__global int *B, \n \
__global int *C) \n \
{ \n \
// Get the work-item's unique ID \n \
int idx = get_global_id(0); \n \
\n \
// Add the corresponding locations of \n \
// 'A' and 'B', and store the reasult in 'C' \n \
C[idx] = A[idx] + B[idx]; \n \
} \n";

/**
 * vec_add : bufA + bufB = bufC
 */

int opencl_test_vec_add() {
    cout << "opencl_test E" << endl;
    // This code executes on the OpenCL host
    // Elements in each array
    const int elements = 512; // query my GPU's CL_DEVICE_MAX_WORK_GROUP_SIZE is 512
    // Compute the size of the data
    size_t datasize = sizeof(int) * elements;
    // Allocate space for input/output host data
    int *A = (int *)malloc(datasize); // Input array
    int *B = (int *)malloc(datasize); // Input array
    int *C = (int *)malloc(datasize); // Output array
    // Initialize the input data
    int i;
    for (i = 0; i < elements; i++){
        A[i] = i;
        B[i] = i;
        C[i] = 0;
    }
    // Use this to check the output of each API call
    cl_int status;
    // Get the first platforms
    cl_platform_id platform;
    status = clGetPlatformIDs(1, &platform, NULL);
    OPENCL_CHECK_ERRORS(status);
    // Get the first devices
    //cl_device_id device = new cl_device_id[3];
    cl_device_id* device_ids = new cl_device_id[MAX_DETECT_NUM];
    memset(device_ids, 0, MAX_DETECT_NUM * sizeof(cl_device_id));
    cl_uint real_devices_num = 0;
    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, MAX_DETECT_NUM, device_ids, &real_devices_num);
    if (real_devices_num != 0) {
        cout << "find cl devices cnt:" << real_devices_num << endl;
        cout << "device_ids[0]:" << device_ids[0] <<endl;
        cout << "device_ids[1]:" << device_ids[1] <<endl;
    }
    OPENCL_CHECK_ERRORS(status);
    // Create a context and associate it with the device
    cl_context context = clCreateContext(NULL, real_devices_num, device_ids, NULL, NULL, &status);
    OPENCL_CHECK_ERRORS(status);
    // Create a command-queue and associate it with device
    // cl_command_queue cmdQueue = clCreateCommandQueueWithProperties(context, device_ids[0], NULL, &status);   //for opencl v2.0
    cl_command_queue_properties properties = 0; //CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
    cl_command_queue cmdQueue = clCreateCommandQueue(context, device_ids[0], properties, &status);              //for opencl v1.2

    OPENCL_CHECK_ERRORS(status);
    // Allocate two input buffers and one output buffer for the three vectors in the vector addition
    cl_mem bufA = clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &status);
    cl_mem bufB = clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &status);
    cl_mem bufC = clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &status);
    OPENCL_CHECK_ERRORS(status);
    // Write data from the input arrays to the buffers
    status = clEnqueueWriteBuffer(cmdQueue, bufA, CL_TRUE, 0, datasize, A, 0, NULL, NULL);
    status = clEnqueueWriteBuffer(cmdQueue, bufB, CL_TRUE, 0, datasize, B, 0, NULL, NULL);
    OPENCL_CHECK_ERRORS(status);
    // Create a program with source code
    cl_program program = clCreateProgramWithSource(context, 1, (const char**)&programSource, NULL, &status);
    OPENCL_CHECK_ERRORS(status);
    // Build(compile) the program for the device
    status = clBuildProgram(program, real_devices_num, device_ids, NULL, NULL, NULL);
    OPENCL_CHECK_ERRORS(status);
    // Create the vector addition kernel
    cl_kernel kernel = clCreateKernel(program, "vecadd", &status);
    OPENCL_CHECK_ERRORS(status);
    // Set the kernel arguments
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufA);
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufB);
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufC);
    OPENCL_CHECK_ERRORS(status);
    // Define an incde space of work-items for execution
    // A work-group size is not required, but can be used.
    size_t indexSpaceSize[1], workGroupSize[1];
    // There are 'elements' work-items
    indexSpaceSize[0] = elements;
    workGroupSize[0] = elements;

    // Execute the kernel
    status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, indexSpaceSize, workGroupSize, 0, NULL, NULL);
    OPENCL_CHECK_ERRORS(status);

    // Read the device output buffer to the host output array
    status = clEnqueueReadBuffer(cmdQueue, bufC, CL_TRUE, 0, datasize, C, 0, NULL, NULL);
    OPENCL_CHECK_ERRORS(status);
    // Free OpenCL resouces
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(cmdQueue);
    clReleaseMemObject(bufA);
    clReleaseMemObject(bufB);
    clReleaseMemObject(bufC);
    clReleaseContext(context);
    // out the result
    cout << "A:" << endl;
    for (int i = 0; i < elements; i++) {
        cout << A[i] <<" ";
    }
    cout <<endl;
    cout << "B:" << endl;
    for (int i = 0; i < elements; i++) {
        cout << B[i] <<" ";
    }
    cout <<endl;
    cout << "C:" << endl;
    for (int i = 0; i < elements; i++) {
        cout << C[i] <<" ";
    }
    cout <<endl;
    // free host resouces
    free(A);
    free(B);
    free(C);
    delete [] device_ids;
    cout << "opencl_test X" << endl;
    return 0;
}

提交到github：
https://github.com/vicentzeng/opencl_demo

文森88

关注

0
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
opencl intel平台编译Demo环境 (vc code + cmake)

下载开发软件：https://software.intel.com/content/www/us/en/develop/articles/opencl-drivers.html?wapkw=openclIntel® CPU Runtime for OpenCL™ Applications 18.1 for Windows* OS (64bit or 32bit)https://fpgasoftware.intel.com/opencl/19.1/?edition=pro面向OpenCL的Intel
复制链接

扫一扫