[OpenCL] 读取一张raw图像(NV12格式),转rgb格式后remap并resize(双线性插值)操作,输出jpg图像,并统计GPU-kernel耗时。

对于raw文件而言,读取其中的NV12格式的数据,需要利用二进制进行读取,再经过GPU处理后,保存输出的数据为jpg文件。

其中resize使用双线性插值法。

首先读取和保存图像接口:

1.data_io.h

 
#ifdef APPLE        //平台相关代码
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
 
 
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
 
cl_int Load_image_from_NV12(const std::string &filename,cl_mem *imageObjects,cl_context context,int &img_h,int &img_w);
 
void Save_pic_by_opencv(const std::string &filename,unsigned char  *rd,int &img_h,int &img_w);

2.data_io.cpp

#include "data_io.h"
 
#include <iostream>
#include <fstream>
#include <sstream>
 
u_char * get_NV12_buffer(const std::string &filePath, int width, int height) 
{
    std::ifstream file(filePath, std::ios::binary | std::ios::ate);
    
    if (!file.is_open()) {
        std::cerr << "Error: Unable to open NV12 file " << filePath << std::endl;
        return NULL;
    }
 
    std::streamsize size = file.tellg();
    file.seekg(0, std::ios::beg);
 
    size=width*height*3/2;
    char *buffer=new char [size];
    
    if (!file.read(buffer, size)) {
        std::cerr << "Error: Unable to read NV12 data from file " << filePath << std::endl;
        return NULL;
    }
     
    return (u_char *)buffer;
}
 
cl_int Load_image_from_NV12(const std::string &filename,cl_mem *imageObjects,cl_context context,int &img_h,int &img_w)
{
 
    cl_int errNum;
 
    u_char * nv12data= get_NV12_buffer(filename, img_w, img_h);
     
    if(NULL==nv12data)
        return 1;
 
    int nv12_size=sizeof(u_char)*img_h*img_w*3/2;
    
    imageObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
                                    nv12_size, nv12data, &errNum);    
    return errNum;
}
 
void Save_pic_by_opencv(const std::string &filename,unsigned char  *rd,int &img_h,int &img_w)
{
   cv::Mat src_Image =cv::Mat(img_h, img_w, CV_8UC3, (void*)rd);
   cv::Mat dst_Image(img_h, img_w, CV_8UC3);
   cv::cvtColor(src_Image, dst_Image,cv::COLOR_RGB2BGR);
   cv::imwrite(filename, dst_Image);
}

3.main.cpp

#include <iostream>
#include <fstream>
#include <sstream>


#ifdef APPLE        //平台相关代码
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif

#include "data_io.h"



//在第一个平台中创建只包括GPU的上下文
cl_context CreateContext()
{
    cl_int errNum;
    cl_uint numPlatforms;
    cl_platform_id firstPlatformId;
    cl_context context = NULL;
    
    
    // 选择第一个平台
    errNum = clGetPlatformIDs(1, &firstPlatformId, &numPlatforms);
    if (errNum != CL_SUCCESS || numPlatforms <= 0)
    {
        std::cerr << "Failed to find any OpenCL platforms." << std::endl;
        return NULL;
    }
 
    //cout<<"numPlatforms="<<numPlatforms<<endl; 1

    // 接下来尝试通过GPU设备建立上下文
    cl_context_properties contextProperties[] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)firstPlatformId,
        0
    };
    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
                                      NULL, NULL, &errNum);

    

    if (errNum != CL_SUCCESS)
    {
        std::cout << "Could not create GPU context, trying CPU..." << std::endl;
        context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_CPU,
                                          NULL, NULL, &errNum);
        if (errNum != CL_SUCCESS)
        {
            std::cerr << "Failed to create an OpenCL GPU or CPU context." << std::endl;
            return NULL;
        }
    }
 
    return context;
}


//在第一个设备上创建命令队列
cl_command_queue CreateCommandQueue(cl_context context, cl_device_id *device)
{
    cl_int errNum;
    cl_device_id *devices;
    cl_command_queue commandQueue = NULL;
    size_t deviceBufferSize = -1;
 
    // 首先获得设备的信息
    errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &deviceBufferSize);
    if (errNum != CL_SUCCESS)
    {
        std::cerr << "Failed call to clGetContextInfo(...,GL_CONTEXT_DEVICES,...)";
        return NULL;
    }
 
    if (deviceBufferSize <= 0)
    {
        std::cerr << "No devices available.";
        return NULL;
    }
 
    //为设备分配内存
    devices = new cl_device_id[deviceBufferSize / sizeof(cl_device_id)];
    errNum = clGetContextInfo(context, CL_CONTEXT_DEVICES, deviceBufferSize, devices, NULL);
    if (errNum != CL_SUCCESS)
    {
        std::cerr << "Failed to get device IDs";
        return NULL;
    }
 
    // 选择第一个设备并为其创建命令队列
    commandQueue = clCreateCommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE, NULL);
    //commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
    if (commandQueue == NULL)
    {
        std::cerr << "Failed to create commandQueue for device 0";
        return NULL;
    }
    
    //释放信息
    *device = devices[0];
    delete [] devices;
    return commandQueue;
}
 

 //  创建OpenCL程序对象
cl_program CreateProgram(cl_context context, cl_device_id device, const char* fileName)
{
    cl_int errNum;
    cl_program program;
 
    std::ifstream kernelFile(fileName, std::ios::in);
    if (!kernelFile.is_open())
    {
        std::cerr << "Failed to open file for reading: " << fileName << std::endl;
        return NULL;
    }
 
    std::ostringstream oss;
    oss << kernelFile.rdbuf();
 
    std::string srcStdStr = oss.str();
    const char *srcStr = srcStdStr.c_str();
    program = clCreateProgramWithSource(context, 1,
                                        (const char**)&srcStr,
                                        NULL, NULL);
    if (program == NULL)
    {
        std::cerr << "Failed to create CL program from source." << std::endl;
        return NULL;
    }
 
    errNum = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (errNum != CL_SUCCESS)
    {
        // 输出错误信息
        char buildLog[16384];
        clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
                              sizeof(buildLog), buildLog, NULL);
 
        std::cerr << "Error in kernel: " << std::endl;
        std::cerr << buildLog;
        clReleaseProgram(program);
        return NULL;
    }
 
    return program;
}


//获取最接近的倍数
size_t RoundUp(int groupSize, int globalSize)
{
    int r = globalSize % groupSize;
    if(r == 0)
    {
         return globalSize;
    }
    else
    {
         return globalSize + groupSize - r;
    }
}


//清除资源
void Cleanup(cl_context context, cl_command_queue commandQueue,
             cl_program program, cl_kernel kernel, cl_mem imageObjects[2])
{
    for (int i = 0; i < 2; i++)
    {
        if (imageObjects[i] != 0)
            clReleaseMemObject(imageObjects[i]);
    }
    if (commandQueue != 0)
        clReleaseCommandQueue(commandQueue);
 
    if (kernel != 0)
        clReleaseKernel(kernel);
 
    if (program != 0)
        clReleaseProgram(program);
    

    if (context != 0)
        clReleaseContext(context);
 
}

int main()
{
    
    //initial varibles
    cl_int errNum=0;
    cl_device_id device = 0;
    cl_context context = 0;
    cl_program program = 0;
    cl_kernel kernel = 0;
    cl_command_queue commandQueue = 0;
    cl_sampler sampler = 0;
    cl_mem imageObjects[2] = { 0, 0 };
    
    int img_w = 3840;
    int img_h = 2176; 

    size_t read_size=img_w*img_h*3;

    size_t g_w=0;
    size_t g_h=0;
    
    std::string input_file,output_jpgfile;
    char *kernel_file=new char [100];
    char *kernel_name=new char [100];
   
    cl_float scale_w=0.5;
    cl_float scale_h=0.5;

    int new_w=0;
    int new_h=0;

    input_file="../data/test.raw";
    output_jpgfile="../data/06yuv2rgb_remap_resize.jpg";
    strcpy(kernel_file,"yuv2rgb_remap_resize.cl");
    strcpy(kernel_name,"kernel_yuv2rgb_remap_resize");
             
     // 1.选择platform,创建contex上下文
    context = CreateContext();
    if (context == NULL)
    {
        std::cerr << "Failed to create OpenCL context." << std::endl;
        return 1;
    }
    
    
    // 2.创建命令队列
    commandQueue = CreateCommandQueue(context, &device);
    if (commandQueue == NULL)
    {
        std::cerr <<"CreateCommandQueue failed"<<std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;
    }
    
    
    // 3. 确保设备支持这种图像格式
    cl_bool imageSupport = CL_FALSE;
    clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool),
                    &imageSupport, NULL);
    if (imageSupport != CL_TRUE)
    {
        std::cerr << "OpenCL device does not support images." << std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        
        return 1;
    }
    
     //4.准备输入数据 

    errNum=Load_image_from_NV12(input_file,imageObjects,context,img_h,img_w);
   
    if (errNum != CL_SUCCESS)
    { 
        std::cerr << "Load_image failed." << std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;

    }

    // 5.创建输出的图像对象
   
    new_w=scale_w*img_w;
    new_h=scale_h*img_h;
    read_size=sizeof(u_char)*3*new_w*new_h;

    imageObjects[1] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
                                    read_size, NULL, &errNum);
    
     if (errNum != CL_SUCCESS)
    { 
        std::cerr << "create output imageobject failed." << std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;

    }

    //6.创建OpenCL-program对象
    
    program = CreateProgram(context, device, kernel_file);
    if (program == NULL)
    {
        std::cerr <<"CreateProgram failed"<<std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;
    }
    
    // 7.创建OpenCL核

    kernel = clCreateKernel(program, kernel_name, NULL);
    
    if (kernel == NULL)
    {
        std::cerr << "Failed to create kernel" << std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;
    }
 
  
    //8. 设定参数
   
        float map_x[img_w];
        float map_y[img_h];

        for (int i = 0; i < img_w; i++) 
        {
            map_x[i] = img_w - i - 1;
        }
        for (int j = 0; j < img_h; j++) 
        {
            map_y[j] = j;
        }

        int ioff=0;
        int hoff=0;

        cl_mem mem_mapx = clCreateBuffer(context, CL_MEM_READ_ONLY| CL_MEM_COPY_HOST_PTR, 
                                    sizeof(float)*img_w, &map_x, &errNum);

        cl_mem mem_mapy = clCreateBuffer(context, CL_MEM_READ_ONLY| CL_MEM_COPY_HOST_PTR, 
                                    sizeof(float)*img_h, &map_y, &errNum);
        
        errNum  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &imageObjects[0]);
        errNum |= clSetKernelArg(kernel, 1, sizeof(cl_int), &img_w);
        errNum |= clSetKernelArg(kernel, 2, sizeof(cl_int), &img_h);
        errNum |= clSetKernelArg(kernel, 3, sizeof(cl_mem) ,&mem_mapx);
        errNum |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &mem_mapy);
        errNum |= clSetKernelArg(kernel, 5, sizeof(cl_int) ,&ioff);
        errNum = clSetKernelArg(kernel, 6, sizeof(cl_int), &hoff);
        errNum |= clSetKernelArg(kernel, 7, sizeof(cl_float), &scale_w);
        errNum |= clSetKernelArg(kernel, 8, sizeof(cl_float), &scale_h);
        errNum |= clSetKernelArg(kernel, 9, sizeof(cl_mem), &imageObjects[1]);

    if (errNum != CL_SUCCESS)
    {
        std::cerr << "Error setting kernel arguments." << std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;
    }
    
    g_w=new_w;
    g_h=new_h;

    size_t localWorkSize[2] = { 16, 16 };

    size_t globalWorkSize[2] =  { RoundUp(localWorkSize[0], g_w),
                                  RoundUp(localWorkSize[1], g_h) };
    
    //9.启动内核,内核执行完成后,会将evt置为CL_SUCCESS/CL_COMPLETE
    cl_event evt;
    
    errNum = clEnqueueNDRangeKernel(commandQueue, kernel,  
                               2, 0, globalWorkSize, localWorkSize, 
                               0, NULL, &evt);  
     if (errNum != CL_SUCCESS)
    {
        std::cerr << "Error clEnqueueNDRangeKernel errNum=." <<errNum<< std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;
    }

    clFinish(commandQueue);
    clWaitForEvents(1, &evt);   //等待命令事件发生
    
    cl_ulong time_start= (cl_ulong)0;
    cl_ulong time_end= (cl_ulong)0;
    double total_time;
    
    clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL);
    clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL);
    clReleaseEvent(evt);
    

    total_time =(double)( time_end - time_start);
    printf("\nExecution time in milliseconds = %f ms\n", (total_time / 1000000.0) );

    //10.读回数据
      
    u_char *read_data = new u_char [read_size];
    errNum =clEnqueueReadBuffer(commandQueue, imageObjects[1], 
                            CL_TRUE, 
                            0, 
                            read_size, 
                            read_data, 0, NULL, NULL);
    if (errNum != CL_SUCCESS)
    {
        std::cerr << "Error reading result buffer enume." <<errNum<< std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;
    }
     
    if (read_data == NULL)
    {
        std::cerr << "Error reading result buffer null." <<errNum<< std::endl;
        Cleanup(context, commandQueue, program, kernel, imageObjects);
        return 1;
    }
    
    //显示图像
   
    Save_pic_by_opencv(output_jpgfile,read_data,new_h,new_w);
    
    Cleanup(context, commandQueue, program, kernel, imageObjects);
    
    return 0;
}

最后,GPU处理的kernel函数

4.yuv2rgb_remap_resize.cl

#define BRD_CHECKS(x, y, X, Y, ybased, ubased,src_rgb)            \
    if ((x) >= 0 && (x) < (X) && (y) >= 0 && (y) < (Y)) {        \
        uchar yy = ybased[x + y * X];                             \
        uchar uu = ubased[(y/2) * X + (x / 2) * 2];                \
        uchar vv = ubased[(y/2) * X + (x / 2) * 2 + 1];            \
        uchar r = yy + 1.402 * (vv - 128);                        \
        uchar g = yy - 0.34413 * (uu - 128) - 0.71414 * (vv - 128);\
        uchar b = yy + 1.772 * (uu - 128);                        \
        src_rgb =(uchar3)(r,g,b);                              \
    }

__kernel void kernel_yuv2rgb_remap_resize(const __global uchar *vinput,
        int input_w, int input_h,
        const __global float *mapx, const __global float *mapy,
        const int ioff,const int bhoff,
        float scale_x, float scale_y,__global uchar *vout){
      
      //获取当前线程中输出的(w,h)坐标
    const int w = get_global_id(0);
    const int h = get_global_id(1);
    
     int WIDTH = input_w;
     int HEIGHT = input_h;
    
     int out_w=input_w*scale_x;

    uchar *ybase = vinput;
    uchar *ubase = &vinput[WIDTH * HEIGHT];

    
    //计算四个相邻像素的坐标
    const float src_x = (float)mapx[(int)(w/scale_x)];
    const float src_y = (float)mapy[(int)(h/scale_y)] ;
    const int x1 = convert_int_rtn(src_x);
    const int y1 = convert_int_rtn(src_y);
    const int x2 = x1 + 1;
    const int y2 = y1 + 1;
    
    int bh = h;
    do {
        vinput += ioff;
        bh += bhoff;
        float3 out = 0;
        //双线性插值计算
        {
            uchar3 src_reg = 0;
            BRD_CHECKS(x1, y1, WIDTH, HEIGHT,vinput,ubase, src_reg);
            //convert_float3函数用于类型转换
            out = out + convert_float3(src_reg) * ((x2 - src_x) * (y2 - src_y));
        }
        {
            uchar3 src_reg = 0;
            BRD_CHECKS(x2, y1, WIDTH, HEIGHT,vinput,ubase,src_reg);
            out = out + convert_float3(src_reg) * ((src_x - x1) * (y2 - src_y));
        }
        {
            uchar3 src_reg = 0;
            BRD_CHECKS(x1, y2, WIDTH, HEIGHT,vinput,ubase,src_reg);
            out = out + convert_float3(src_reg) * ((x2 - src_x) * (src_y - y1));
        }
        {
            uchar3 src_reg = 0;
            BRD_CHECKS(x2, y2, WIDTH, HEIGHT,vinput,ubase,src_reg);
            out = out + convert_float3(src_reg) * ((src_x - x1) * (src_y - y1));
        }

        float3 rgb = out;

      
        vstore3(convert_uchar3_sat(rgb), mad24(bh, out_w, w), vout);

    } while (0);
}

  • 8
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值