对于raw文件而言,读取其中的NV12格式的数据,需要利用二进制进行读取,再经过GPU处理后,保存输出的数据为jpg文件。
其中resize使用双线性插值法。
首先读取和保存图像接口:
1.data_io.h
#ifdef APPLE //平台相关代码
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
cl_int Load_image_from_NV12(const std::string &filename,cl_mem *imageObjects,cl_context context,int &img_h,int &img_w);
void Save_pic_by_opencv(const std::string &filename,unsigned char *rd,int &img_h,int &img_w);
2.data_io.cpp
#include "data_io.h"
#include <iostream>
#include <fstream>
#include <sstream>
u_char * get_NV12_buffer(const std::string &filePath, int width, int height)
{
std::ifstream file(filePath, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
std::cerr << "Error: Unable to open NV12 file " << filePath << std::endl;
return NULL;
}
std::streamsize size = file.tellg();
file.seekg(0, std::ios::beg);
size=width*height*3/2;
char *buffer=new char [size];
if (!file.read(buffer, size)) {
std::cerr << "Error: Unable to read NV12 data from file " << filePath << std::endl;
return NULL;
}
return (u_char *)buffer;
}
cl_int Load_image_from_NV12(const std::string &filename,cl_mem *imageObjects,cl_context context,int &img_h,int &img_w)
{
cl_int errNum;
u_char * nv12data= get_NV12_buffer(filename, img_w, img_h);
if(NULL==nv12data)
return 1;
int nv12_size=sizeof(u_char)*img_h*img_w*3/2;
imageObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
nv12_size, nv12data, &errNum);
return errNum;
}
void Save_pic_by_opencv(const std::string &filename,unsigned char *rd,int &img_h,int &img_w)
{
cv::Mat src_Image =cv::Mat(img_h, img_w, CV_8UC3, (void*)rd);
cv::Mat dst_Image(img_h, img_w, CV_8UC3);
cv::cvtColor(src_Image, dst_Image,cv::COLOR_RGB2BGR);
cv::imwrite(filename, dst_Image);
}
3.main.cpp
#include <iostream>
#include <fstream>
#include <sstream>
#ifdef APPLE //平台相关代码
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include "data_io.h"
//在第一个平台中创建只包括GPU的上下文
cl_context CreateContext()
{
cl_int errNum;
cl_uint numPlatforms;
cl_platform_id firstPlatformId;
cl_context context = NULL;
// 选择第一个平台
errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
if (errNum != CL_SUCCESS || numPlatforms <= 0)
{
std::cerr << "Failed to find any OpenCL platforms." << std::endl;
return NULL;
}
//cout<<"numPlatforms="<<numPlatforms<<endl; 1
// 接下来尝试通过GPU设备建立上下文
cl_context_properties contextProperties[] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)firstPlatformId,
0
};
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
NULL, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cout << "Could not create GPU context, trying CPU..." << std::endl;
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,
NULL, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
return NULL;
}
}
return context;
}
//在第一个设备上创建命令队列
cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)
{
cl_int errNum;
cl_device_id *devices;
cl_command_queue commandQueue = NULL;
size_t deviceBufferSize = -1;
// 首先获得设备的信息
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";
return NULL;
}
if (deviceBufferSize <= 0)
{
std::cerr << "No devices available.";
return NULL;
}
//为设备分配内存
devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Failed to get device IDs";
return NULL;
}
// 选择第一个设备并为其创建命令队列
commandQueue = clCreateCommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE, NULL);
//commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
if (commandQueue == NULL)
{
std::cerr << "Failed to create commandQueue for device 0";
return NULL;
}
//释放信息
*device = devices[0];
delete [] devices;
return commandQueue;
}
// 创建OpenCL程序对象
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
cl_int errNum;
cl_program program;
std::ifstream kernelFile(fileName, std::ios::in);
if (!kernelFile.is_open())
{
std::cerr << "Failed to open file for reading: " << fileName << std::endl;
return NULL;
}
std::ostringstream oss;
oss << kernelFile.rdbuf();
std::string srcStdStr = oss.str();
const char *srcStr = srcStdStr.c_str();
program = clCreateProgramWithSource(context, 1,
(const char**)&srcStr,
NULL, NULL);
if (program == NULL)
{
std::cerr << "Failed to create CL program from source." << std::endl;
return NULL;
}
errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (errNum != CL_SUCCESS)
{
// 输出错误信息
char buildLog[16384];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
std::cerr << "Error in kernel: " << std::endl;
std::cerr << buildLog;
clReleaseProgram(program);
return NULL;
}
return program;
}
//获取最接近的倍数
size_t RoundUp(int groupSize, int globalSize)
{
int r = globalSize % groupSize;
if(r == 0)
{
return globalSize;
}
else
{
return globalSize + groupSize - r;
}
}
//清除资源
void Cleanup(cl_context context, cl_command_queue commandQueue,
cl_program program, cl_kernel kernel, cl_mem imageObjects[2])
{
for (int i = 0; i < 2; i++)
{
if (imageObjects[i] != 0)
clReleaseMemObject(imageObjects[i]);
}
if (commandQueue != 0)
clReleaseCommandQueue(commandQueue);
if (kernel != 0)
clReleaseKernel(kernel);
if (program != 0)
clReleaseProgram(program);
if (context != 0)
clReleaseContext(context);
}
int main()
{
//initial varibles
cl_int errNum=0;
cl_device_id device = 0;
cl_context context = 0;
cl_program program = 0;
cl_kernel kernel = 0;
cl_command_queue commandQueue = 0;
cl_sampler sampler = 0;
cl_mem imageObjects[2] = { 0, 0 };
int img_w = 3840;
int img_h = 2176;
size_t read_size=img_w*img_h*3;
size_t g_w=0;
size_t g_h=0;
std::string input_file,output_jpgfile;
char *kernel_file=new char [100];
char *kernel_name=new char [100];
cl_float scale_w=0.5;
cl_float scale_h=0.5;
int new_w=0;
int new_h=0;
input_file="../data/test.raw";
output_jpgfile="../data/05yuv2rgb_resize.jpg";
strcpy(kernel_file,"yuv2rgb_resize.cl");
strcpy(kernel_name,"kernel_yuv2rgb_resize");
// 1.选择platform,创建contex上下文
context = CreateContext();
if (context == NULL)
{
std::cerr << "Failed to create OpenCL context." << std::endl;
return 1;
}
// 2.创建命令队列
commandQueue = CreateCommandQueue(context, &device);
if (commandQueue == NULL)
{
std::cerr <<"CreateCommandQueue failed"<<std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
// 3. 确保设备支持这种图像格式
cl_bool imageSupport = CL_FALSE;
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool),
&imageSupport, NULL);
if (imageSupport != CL_TRUE)
{
std::cerr << "OpenCL device does not support images." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
//4.准备输入数据
errNum=Load_image_from_NV12(input_file,imageObjects,context,img_h,img_w);
if (errNum != CL_SUCCESS)
{
std::cerr << "Load_image failed." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
// 5.创建输出的图像对象
new_w=scale_w*img_w;
new_h=scale_h*img_h;
read_size=sizeof(u_char)*3*new_w*new_h;
imageObjects[1] = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
read_size, NULL, &errNum);
if (errNum != CL_SUCCESS)
{
std::cerr << "create output imageobject failed." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
//6.创建OpenCL-program对象
program = CreateProgram(context, device, kernel_file);
if (program == NULL)
{
std::cerr <<"CreateProgram failed"<<std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
// 7.创建OpenCL核
kernel = clCreateKernel(program, kernel_name, NULL);
if (kernel == NULL)
{
std::cerr << "Failed to create kernel" << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
//8. 设定参数
errNum = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imageObjects[0]);
errNum |= clSetKernelArg(kernel, 1, sizeof(cl_int), &img_w);
errNum |= clSetKernelArg(kernel, 2, sizeof(cl_int), &img_h);
errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &imageObjects[1]);
errNum |= clSetKernelArg(kernel, 4, sizeof(cl_float), &scale_w);
errNum |= clSetKernelArg(kernel, 5, sizeof(cl_float), &scale_h);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error setting kernel arguments." << std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
g_w=new_w;
g_h=new_h;
size_t localWorkSize[2] = { 16, 16 };
size_t globalWorkSize[2] = { RoundUp(localWorkSize[0], g_w),
RoundUp(localWorkSize[1], g_h) };
//9.启动内核,内核执行完成后,会将evt置为CL_SUCCESS/CL_COMPLETE
cl_event evt;
errNum = clEnqueueNDRangeKernel(commandQueue, kernel,
2, 0, globalWorkSize, localWorkSize,
0, NULL, &evt);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error clEnqueueNDRangeKernel errNum=." <<errNum<< std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
clFinish(commandQueue);
clWaitForEvents(1, &evt); //等待命令事件发生
cl_ulong time_start= (cl_ulong)0;
cl_ulong time_end= (cl_ulong)0;
double total_time;
clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL);
clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL);
clReleaseEvent(evt);
total_time =(double)( time_end - time_start);
printf("\nExecution time in milliseconds = %f ms\n", (total_time / 1000000.0) );
//10.读回数据
u_char *read_data = new u_char [read_size];
errNum =clEnqueueReadBuffer(commandQueue, imageObjects[1],
CL_TRUE,
0,
read_size,
read_data, 0, NULL, NULL);
if (errNum != CL_SUCCESS)
{
std::cerr << "Error reading result buffer enume." <<errNum<< std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
if (read_data == NULL)
{
std::cerr << "Error reading result buffer null." <<errNum<< std::endl;
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 1;
}
//显示图像
Save_pic_by_opencv(output_jpgfile,read_data,new_h,new_w);
Cleanup(context, commandQueue, program, kernel, imageObjects);
return 0;
}
最后,GPU处理的kernel函数
4.yuv2rgb_resize.cl
#define BRD_CHECKS(x, y, X, Y, ybased, ubased,src_rgb) \
if ((x) >= 0 && (x) < (X) && (y) >= 0 && (y) < (Y)) { \
uchar yy = ybased[x + y * X]; \
uchar uu = ubased[(y/2) * X + (x / 2) * 2]; \
uchar vv = ubased[(y/2) * X + (x / 2) * 2 + 1]; \
uchar r = yy + 1.402 * (vv - 128); \
uchar g = yy - 0.34413 * (uu - 128) - 0.71414 * (vv - 128);\
uchar b = yy + 1.772 * (uu - 128); \
src_rgb =(uchar3)(r,g,b); \
}
__kernel void kernel_yuv2rgb_resize(const __global uchar *vinput,
int input_w, int input_h,
__global uchar *vout,
float scale_x, float scale_y){
const int w = get_global_id(0);
const int h = get_global_id(1);
int WIDTH = input_w;
int HEIGHT = input_h;
int out_w=input_w*scale_x;
uchar *ybase = vinput;
uchar *ubase = &vinput[WIDTH * HEIGHT];
const float src_x = (float)(w)/scale_x;
const float src_y = (float)(h)/scale_y ;
const int x1 = convert_int_rtn(src_x);
const int y1 = convert_int_rtn(src_y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
int bh = h;
do {
float3 out = 0;
//双线性插值计算
{
uchar3 src_reg = 0;
BRD_CHECKS(x1, y1, WIDTH, HEIGHT,vinput,ubase, src_reg);
//convert_float3函数用于类型转换
out = out + convert_float3(src_reg) * ((x2 - src_x) * (y2 - src_y));
}
{
uchar3 src_reg = 0;
BRD_CHECKS(x2, y1, WIDTH, HEIGHT,vinput,ubase,src_reg);
out = out + convert_float3(src_reg) * ((src_x - x1) * (y2 - src_y));
}
{
uchar3 src_reg = 0;
BRD_CHECKS(x1, y2, WIDTH, HEIGHT,vinput,ubase,src_reg);
out = out + convert_float3(src_reg) * ((x2 - src_x) * (src_y - y1));
}
{
uchar3 src_reg = 0;
BRD_CHECKS(x2, y2, WIDTH, HEIGHT,vinput,ubase,src_reg);
out = out + convert_float3(src_reg) * ((src_x - x1) * (src_y - y1));
}
float3 rgb = out;
vstore3(convert_uchar3_sat(rgb), mad24(bh, out_w, w), vout);
} while (0);
}