接上一篇文章中,利用opencl-buffer灰度化图像以外,还可以使用image2D进行操作。
本文利用opencv读取一张jpg图像,将数据传入opencl-image2D中,使用kernel快速灰度化图像,并且保存输出图像到本地。
首先读取和保存图像接口:
1.data_io.h
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#ifdef APPLE //平台相关代码
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
cl_int Load_image2D_by_opencv(const char * filename,cl_mem *imageObjects,cl_context context,int &h,int &w);
void Show_pic_by_opencv(unsigned char *rd);
2.data_io.cpp
#include "data_io.h"
cv::Mat srcImage,dst_Image;
cl_int Load_image2D_by_opencv(const char * filename,cl_mem *imageObjects,cl_context context,int &img_h,int &img_w)
{
//3.加载图像
cl_int errNum;
srcImage = cv::imread(filename);
img_h = srcImage.rows;
img_w = srcImage.cols;
dst_Image= cv::Mat(srcImage.size(), srcImage.type());
u_char *rd = new u_char[img_h* img_w *4];
memcpy(rd, static_cast<u_char*>(srcImage.data), img_h* img_w *3);//转化函数,注意Mat的data成员
// 创建OpenCL图像对象
cl_image_format clImageFormat;//图像格式属性
clImageFormat.image_channel_order = CL_RGBA;
clImageFormat.image_channel_data_type = CL_UNORM_INT8;
imageObjects[0] = clCreateImage2D(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
&clImageFormat,
img_w,
img_h,
0,
rd,
&errNum);
std::cerr << "L4.2" <<std::endl;
return errNum;
}
void Show_pic_by_opencv(u_char *rd)
{
dst_Image = cv::Mat(dst_Image.rows, dst_Image.cols, dst_Image.type(), (void *)rd); //h_,w_是图像长宽
cv::imwrite("../data/des_img/00grayImage.jpg", dst_Image);
}
其次,主函数:
3.main.cpp
#include <iostream>
#include <fstream>
#include <sstream>
#ifdef APPLE //平台相关代码
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "data_io.h"
//在第一个平台中创建只包括GPU的上下文
cl_context CreateContext()
{
cl_int errNum;
cl_uint numPlatforms;
cl_platform_id firstPlatformId;
cl_context context = NULL;
// 选择第一个平台
errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
if (errNum != CL_SUCCESS || numPlatforms <= 0)
{
std::cerr << "Failed to find any OpenCL platforms." << std::endl;
return NULL;
}
// 接下来尝试通过GPU设备建立上下文
cl_context_properties contextProperties[] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)firstPlatformId,
0
};
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
NULL, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cout << "Could not create GPU context, trying CPU..." << std::endl;
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,
NULL, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
return NULL;
}
}
return context;
}
//在第一个设备上创建命令队列
cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)
{
cl_int errNum;
cl_device_id *devices;
cl_command_queue commandQueue = NULL;
size_t deviceBufferSize = -1;
// 首先获得设备的信息
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";
return NULL;
}
if (deviceBufferSize <= 0)
{
std::cerr << "No devices available.";
return NULL;
}
//为设备分配内存
devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to get device IDs";
return NULL;
}
// 选择第一个设备并为其创建命令队列
commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
if (commandQueue == NULL)
{
std::cerr << "Failed to create commandQueue for device 0";
return NULL;
}
//释放信息
*device = devices[0];
delete [] devices;
return commandQueue;
}
// 创建OpenCL程序对象
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
cl_int errNum;
cl_program program;
std::ifstream kernelFile(fileName, std::ios::in);
if (!kernelFile.is_open())
{
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
return NULL;
}
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,
(const char**)&srcStr,
NULL, NULL);
if (program == NULL)
{
std::cerr << "Failed to create CL program from source." << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
// 输出错误信息
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in kernel: " << std::endl;
std::cerr << buildLog;
clReleaseProgram(program);
return NULL;
}
return program;
}
//获取最接近的倍数
size_t RoundUp(int groupSize, int globalSize)
{
int r = globalSize % groupSize;
if(r == 0)
{
return globalSize;
}
else
{
return globalSize + groupSize - r;
}
}
//清除资源
void Cleanup(cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem imageObjects[2],
cl_sampler sampler)
{
for (int i = 0; i < 2; i++)
{
if (imageObjects[i] != 0)
clReleaseMemObject(imageObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
if (kernel != 0)
clReleaseKernel(kernel);
if (program != 0)
clReleaseProgram(program);
if (sampler != 0)
clReleaseSampler(sampler);
if (context != 0)
clReleaseContext(context);
}
int main()
{
//initial varibles
cl_int errNum;
cl_device_id device = 0;
cl_context context = 0;
cl_program program = 0;
cl_kernel kernel = 0;
cl_command_queue commandQueue = 0;
cl_sampler sampler = 0;
cl_mem imageObjects[2] = { 0, 0 };
int img_h , img_w ;
// 1.选择platform,创建contex上下文
context = CreateContext();
if (context == NULL)
{
std::cerr << "Failed to create OpenCL context." << std::endl;
return 1;
}
// 2.创建命令队列
commandQueue = CreateCommandQueue(context, &device);
if (commandQueue == NULL)
{
std::cerr <<"CreateCommandQueue failed"<<std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return 1;
}
// 3. 确保设备支持这种图像格式
cl_bool imageSupport = CL_FALSE;
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool),
&imageSupport, NULL);
if (imageSupport != CL_TRUE)
{
std::cerr << "OpenCL device does not support images." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return 1;
}
//4.准备输入数据
std::cerr << "L4." << std::endl;
errNum=Load_image2D_by_opencv("../data/src_img/test.jpg",imageObjects,context,img_h,img_w);
if (errNum != CL_SUCCESS)
{
std::cerr << "Load_image failed." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return 1;
}
// 5.创建输出的图像对象
cl_image_format clImageFormat;
clImageFormat.image_channel_order = CL_RGBA;
clImageFormat.image_channel_data_type = CL_UNORM_INT8;
imageObjects[1] = clCreateImage2D(context,
CL_MEM_WRITE_ONLY,
&clImageFormat,
img_w,
img_h,
0,
NULL,
&errNum);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error creating CL output image object." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return errNum;
}
// 创建采样器对象
sampler = clCreateSampler(context,
CL_FALSE, // 非规范化坐标
CL_ADDRESS_CLAMP_TO_EDGE,
CL_FILTER_NEAREST,
&errNum);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error creating CL sampler object." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return 1;
}
//6.创建OpenCL-program对象
program = CreateProgram(context, device, "gray.cl");
if (program == NULL)
{
std::cerr <<"CreateProgram failed"<<std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return 1;
}
// 7.创建OpenCL核
kernel = clCreateKernel(program, "kernel_gray", NULL);
if (kernel == NULL)
{
std::cerr << "Failed to create kernel" << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return 1;
}
//8. 设定参数
errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imageObjects[0]);
errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &imageObjects[1]);
errNum |= clSetKernelArg(kernel, 2, sizeof(cl_sampler), &sampler);
errNum |= clSetKernelArg(kernel, 3, sizeof(cl_int), &img_w);
errNum |= clSetKernelArg(kernel, 4, sizeof(cl_int), &img_h);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error setting kernel arguments." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return 1;
}
size_t localWorkSize[2] = { 32, 4 };
size_t globalWorkSize[2] = { RoundUp(localWorkSize[0], img_w),
RoundUp(localWorkSize[1], img_h) };
//9.启动内核,内核执行完成后,会将evt置为CL_SUCCESS/CL_COMPLETE
cl_event evt;
errNum = clEnqueueNDRangeKernel(commandQueue, kernel,
2, 0, globalWorkSize, localWorkSize,
0, NULL, &evt);
clWaitForEvents(1, &evt); //等待命令事件发生
clReleaseEvent(evt);
//10.读回数据
unsigned char *read_data = new unsigned char [img_w * img_h * 4];
size_t origin[3] = { 0, 0, 0 };
size_t region[3] = { size_t(img_w), size_t(img_h), 1};
errNum = clEnqueueReadImage(commandQueue, imageObjects[1], CL_TRUE,
origin, region, 0, 0, read_data,
0, NULL, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error reading result buffer." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
return 1;
}
//显示图像
Show_pic_by_opencv(read_data);
Cleanup(context, commandQueue, program, kernel, imageObjects, sampler);
std::cout<<"hello world"<<std::endl;
return 0;
}
最后,实际灰度图像的kernel函数
4.gray.cl
__kernel void kernel_gray(__read_only image2d_t srcImg,__write_only image2d_t dstImg,
sampler_t sampler,
int width, int height)
{
//float kernelWeights[4] = { 0.299f, 0.587f, 0.114f, 1.0f };
int x = get_global_id(0);
int y = get_global_id(1);
int2 outImageCoord = (int2) (x, y);
if (outImageCoord.x < width && outImageCoord.y < height)
{
float outColor1;
outColor1 += ((read_imagef(srcImg, sampler, (int2)(x, y)) ).x)*0.2989f;
outColor1 +=((read_imagef(srcImg, sampler, (int2)(x, y)) ).y)*0.5870f;
outColor1 += ((read_imagef(srcImg, sampler, (int2)(x, y)) ).z)*0.1140f;
float outColor4 = ((read_imagef(srcImg, sampler, (int2)(x, y)) ).w);
write_imagef(dstImg, outImageCoord,(float4)(outColor1, outColor1, outColor1, outColor4));
}
}
5.Makefile
demo : main.cpp data_io.cpp
g++ `pkg-config opencv4 --cflags` main.cpp data_io.cpp -o demo `pkg-config opencv4 --libs` -D CL_TARGET_OPENCL_VERSION=100 -lOpenCL