Chart 11 OpenCL案例


前言

这是 opencl 的最后一章了,没什么特别的,写了一个 cv::Mat 乘以一个标量,基于 buf 和 image 两种方法实现。


cv::Mat 乘以标量

代码中引入了 cl.h 这个头文件,可以下载对应的库,或者直接把头文件拿过来用,头文件我就不提供了,可以去 github 上找找,函数没什么特别的,注意创建 buffer 对象和 image 对象时,注册对应的核函数。

整体而言opencl 的整个启动比较复杂繁琐。

#include <iostream>
#include <cassert>

#include <CL/cl.h>

#include "opencv2/core.hpp"
#include "opencv2/opencv.hpp"
#include "opencv2/imgproc.hpp"

#define CHECK_OPENCL_ERROR(call) \
    do { \
        cl_int err = call; \
        if (err != CL_SUCCESS) { \
            std::cerr << "OpenCL error at " << __FILE__ << ":" << __LINE__ << " - Code: " << err << std::endl; \
            return; \
        } \
    } while (0)


// OpenCL Kernel
const char *kernelSource = R"(
    __kernel void mat_mul_buf_factor(__global const uchar* src,
                                        __global uchar* dst,
                                        const int sw,
                                        const uchar factor) {
        const int x = get_global_id(0); // x direction
        const int y = get_global_id(1); // y direction
        // const int w = get_global_size(0); // get cv::Mat cols
        // const int h = get_global_size(1); // get cv::Mat rows
        int idx = y * sw + x;

        uchar4 pixel = vload4(idx, src);
        uchar4 result = pixel * factor;
        vstore4(result, idx, dst);
    }

    __kernel void mat_mul_img_factor(__read_only image2d_t src,
                              __write_only image2d_t dst,
                              const uchar factor) {
        const int2 pos = {get_global_id(0), get_global_id(1)};
        uint4 pixel = read_imageui(src, pos);
        uint4 result = pixel * factor;
        write_imageui(dst, pos, result);
    }
)";

void startup(cv::InputArray& _src, cv::OutputArray& _dst) {
    cv::Size ssize = _src.size();
    assert(!ssize.empty());

    cv::Mat src = _src.getMat();
    _dst.create(ssize, src.type());
    cv::Mat dst = _dst.getMat();
    int sw = ssize.width;
    uchar factor = 2;

    cl_int err;
    // Get platform and device information
    cl_platform_id platform;
    clGetPlatformIDs(1, &platform, NULL);

    cl_device_id device;
    clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);

    // 创建 OpenCL 上下文
    cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
    CHECK_OPENCL_ERROR(err);

    // Create command queue
    cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);
    CHECK_OPENCL_ERROR(err);

    // 基于 buf
    // cl_mem srcBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(uchar) * src.total() * src.channels(), src.data, &err);
    // CHECK_OPENCL_ERROR(err);
    // cl_mem dstBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(uchar) * dst.total() * dst.channels(), dst.data, &err);
    // CHECK_OPENCL_ERROR(err);

    // 基于 img
    cl_image_format imageFormat;
    imageFormat.image_channel_order = CL_R;
    imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
    cl_mem srcBuffer = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                                       &imageFormat, src.cols, src.rows, 0, src.data, &err);
    CHECK_OPENCL_ERROR(err);
    cl_mem dstBuffer = clCreateImage2D(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
                                       &imageFormat, dst.cols, dst.rows, 0, dst.data, &err);
    CHECK_OPENCL_ERROR(err);

    // Create OpenCL program from source code
    cl_program program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err);
    CHECK_OPENCL_ERROR(err);

    // Build the program
    err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
    if (err != CL_SUCCESS) {
        size_t logSize;
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
        char *log = (char *)malloc(logSize);
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
        fprintf(stderr, "OpenCL program build log:\n%s", log);
        free(log);
        CHECK_OPENCL_ERROR(err);  // 抛出错误
    }

    // Create the OpenCL kernel
    cl_kernel kernel = clCreateKernel(program, "mat_mul_img_factor", &err);
    CHECK_OPENCL_ERROR(err);

    // Set the kernel arguments
    clSetKernelArg(kernel, 0, sizeof(cl_mem), &srcBuffer);  // src
    clSetKernelArg(kernel, 1, sizeof(cl_mem), &dstBuffer);  // dst
    // 基于 buf
    // clSetKernelArg(kernel, 2, sizeof(int), &sw);            // sw - replace with the actual value
    // clSetKernelArg(kernel, 3, sizeof(uchar), &factor);      // factor - replace with the actual value
    // 基于 img
    clSetKernelArg(kernel, 2, sizeof(uchar), &factor);      // factor - replace with the actual value

    // Execute the kernel
    size_t globalSize[] = {static_cast<size_t>(dst.cols), static_cast<size_t>(dst.rows)};
    cl_event event;
    clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, &event);
    clFinish(queue);

    // 基于 buffer
    // err = clEnqueueReadBuffer(queue, dstBuffer, CL_TRUE, 0, sizeof(uchar) * dst.total() * dst.channels(), (void *)dst.data, 0, NULL, NULL);
    // 基于 image
    size_t origin[] = {0, 0, 0}; // Assuming 3D image
    size_t region[] = {static_cast<size_t>(dst.cols), static_cast<size_t>(dst.rows), 1}; // Assuming 3D image
    err = clEnqueueReadImage(queue, dstBuffer, CL_TRUE, origin, region, 0, 0, (void *)dst.data, 0, NULL, NULL);
    CHECK_OPENCL_ERROR(err);

    // Check if the kernel is executed successfully
    cl_int status;
    clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
    if (status == CL_COMPLETE) {
        std::cout << "Kernel execution completed successfully." << std::endl;
    } else {
        std::cerr << "Kernel execution failed." << std::endl;
    }
    clReleaseEvent(event);
    // Release OpenCL objects
    clReleaseMemObject(srcBuffer);
    clReleaseMemObject(dstBuffer);
    clReleaseProgram(program);
    clReleaseKernel(kernel);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);
}

int main() {
    cv::Mat mat_16S(8, 8, CV_8UC1);
    // 随机生成值
    cv::RNG rnger(cv::getTickCount());
    rnger.fill(mat_16S, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(10));
    cv::Mat dst;

    std::cout << "mat_16S:\n" << mat_16S << std::endl;
    startup(mat_16S, dst);
    std::cout << "dst:\n" << dst << std::endl;

    return 0;
}


总结

附上一个网站供大家学习 opencl API,这是一个官方网站,可以查询对应的 API 调用方法

OpenCL 官网

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值