文章目录
前言
这是 opencl 的最后一章了,没什么特别的,写了一个 cv::Mat 乘以一个标量,基于 buf 和 image 两种方法实现。
cv::Mat 乘以标量
代码中引入了 cl.h
这个头文件,可以下载对应的库,或者直接把头文件拿过来用,头文件我就不提供了,可以去 github 上找找,函数没什么特别的,注意创建 buffer 对象和 image 对象时,注册对应的核函数。
整体而言opencl 的整个启动比较复杂繁琐。
#include <iostream>
#include <cassert>
#include <CL/cl.h>
#include "opencv2/core.hpp"
#include "opencv2/opencv.hpp"
#include "opencv2/imgproc.hpp"
#define CHECK_OPENCL_ERROR(call) \
do { \
cl_int err = call; \
if (err != CL_SUCCESS) { \
std::cerr << "OpenCL error at " << __FILE__ << ":" << __LINE__ << " - Code: " << err << std::endl; \
return; \
} \
} while (0)
// OpenCL Kernel
const char *kernelSource = R"(
__kernel void mat_mul_buf_factor(__global const uchar* src,
__global uchar* dst,
const int sw,
const uchar factor) {
const int x = get_global_id(0); // x direction
const int y = get_global_id(1); // y direction
// const int w = get_global_size(0); // get cv::Mat cols
// const int h = get_global_size(1); // get cv::Mat rows
int idx = y * sw + x;
uchar4 pixel = vload4(idx, src);
uchar4 result = pixel * factor;
vstore4(result, idx, dst);
}
__kernel void mat_mul_img_factor(__read_only image2d_t src,
__write_only image2d_t dst,
const uchar factor) {
const int2 pos = {get_global_id(0), get_global_id(1)};
uint4 pixel = read_imageui(src, pos);
uint4 result = pixel * factor;
write_imageui(dst, pos, result);
}
)";
void startup(cv::InputArray& _src, cv::OutputArray& _dst) {
cv::Size ssize = _src.size();
assert(!ssize.empty());
cv::Mat src = _src.getMat();
_dst.create(ssize, src.type());
cv::Mat dst = _dst.getMat();
int sw = ssize.width;
uchar factor = 2;
cl_int err;
// Get platform and device information
cl_platform_id platform;
clGetPlatformIDs(1, &platform, NULL);
cl_device_id device;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
// 创建 OpenCL 上下文
cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
CHECK_OPENCL_ERROR(err);
// Create command queue
cl_command_queue queue = clCreateCommandQueue(context, device, 0, &err);
CHECK_OPENCL_ERROR(err);
// 基于 buf
// cl_mem srcBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(uchar) * src.total() * src.channels(), src.data, &err);
// CHECK_OPENCL_ERROR(err);
// cl_mem dstBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(uchar) * dst.total() * dst.channels(), dst.data, &err);
// CHECK_OPENCL_ERROR(err);
// 基于 img
cl_image_format imageFormat;
imageFormat.image_channel_order = CL_R;
imageFormat.image_channel_data_type = CL_UNSIGNED_INT8;
cl_mem srcBuffer = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
&imageFormat, src.cols, src.rows, 0, src.data, &err);
CHECK_OPENCL_ERROR(err);
cl_mem dstBuffer = clCreateImage2D(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
&imageFormat, dst.cols, dst.rows, 0, dst.data, &err);
CHECK_OPENCL_ERROR(err);
// Create OpenCL program from source code
cl_program program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err);
CHECK_OPENCL_ERROR(err);
// Build the program
err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
if (err != CL_SUCCESS) {
size_t logSize;
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
char *log = (char *)malloc(logSize);
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
fprintf(stderr, "OpenCL program build log:\n%s", log);
free(log);
CHECK_OPENCL_ERROR(err); // 抛出错误
}
// Create the OpenCL kernel
cl_kernel kernel = clCreateKernel(program, "mat_mul_img_factor", &err);
CHECK_OPENCL_ERROR(err);
// Set the kernel arguments
clSetKernelArg(kernel, 0, sizeof(cl_mem), &srcBuffer); // src
clSetKernelArg(kernel, 1, sizeof(cl_mem), &dstBuffer); // dst
// 基于 buf
// clSetKernelArg(kernel, 2, sizeof(int), &sw); // sw - replace with the actual value
// clSetKernelArg(kernel, 3, sizeof(uchar), &factor); // factor - replace with the actual value
// 基于 img
clSetKernelArg(kernel, 2, sizeof(uchar), &factor); // factor - replace with the actual value
// Execute the kernel
size_t globalSize[] = {static_cast<size_t>(dst.cols), static_cast<size_t>(dst.rows)};
cl_event event;
clEnqueueNDRangeKernel(queue, kernel, 2, NULL, globalSize, NULL, 0, NULL, &event);
clFinish(queue);
// 基于 buffer
// err = clEnqueueReadBuffer(queue, dstBuffer, CL_TRUE, 0, sizeof(uchar) * dst.total() * dst.channels(), (void *)dst.data, 0, NULL, NULL);
// 基于 image
size_t origin[] = {0, 0, 0}; // Assuming 3D image
size_t region[] = {static_cast<size_t>(dst.cols), static_cast<size_t>(dst.rows), 1}; // Assuming 3D image
err = clEnqueueReadImage(queue, dstBuffer, CL_TRUE, origin, region, 0, 0, (void *)dst.data, 0, NULL, NULL);
CHECK_OPENCL_ERROR(err);
// Check if the kernel is executed successfully
cl_int status;
clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
if (status == CL_COMPLETE) {
std::cout << "Kernel execution completed successfully." << std::endl;
} else {
std::cerr << "Kernel execution failed." << std::endl;
}
clReleaseEvent(event);
// Release OpenCL objects
clReleaseMemObject(srcBuffer);
clReleaseMemObject(dstBuffer);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
}
int main() {
cv::Mat mat_16S(8, 8, CV_8UC1);
// 随机生成值
cv::RNG rnger(cv::getTickCount());
rnger.fill(mat_16S, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(10));
cv::Mat dst;
std::cout << "mat_16S:\n" << mat_16S << std::endl;
startup(mat_16S, dst);
std::cout << "dst:\n" << dst << std::endl;
return 0;
}
总结
附上一个网站供大家学习 opencl API,这是一个官方网站,可以查询对应的 API 调用方法