Opencv实现hwc到chw(归一化、减均值、除方差)重磅封装版

_kx_

已于 2023-04-27 01:19:19 修改

阅读量7.2k

点赞数 8

分类专栏： C++ 文章标签： opencv 计算机视觉人工智能图像处理

于 2021-12-30 09:32:58 首次发布

本文链接：https://blog.csdn.net/weixin_45137428/article/details/122229184

版权

C++ 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

文章目录

1. hwc 和 chw 的内存排布区别：

opencv: 原始数据(rgb)排布(hwc)：假如是 width =5, height =3;
rgbrgbrgbrgbrgb
rgbrgbrgbrgbrgb
rgbrgbrgbrgbrgb
目标排布（chw)：假如是 width =5, height =3;
rrrrr
rrrrr
rrrrr
ggggg
ggggg
ggggg
bbbbb
bbbbb
bbbbb

2. 预处理参数封装

enum class NormType : int{
    None      = 0,
    MeanStd   = 1,
    AlphaBeta = 2
};

enum class ChannelType : int{
    None          = 0,
    Invert        = 1
};


/**
 * \brief: 为了预处理方便, 我们对预处理需要参数进行封装;
 * 
 */ 
struct Norm{
    float mean_[3];
    float std_[3];
    float alpha_;
    float beta_;

    NormType    normType_    = NormType::None;
    ChannelType channelType_ = ChannelType::None;

    /**
     * \brief: 静态方法, 可以不实例化 Norm, 仅仅通过 Norm::mean_std() 来调用; 减均值除方差的参数赋值
     */ 
    // out = (x * alpha - mean) / std
    static Norm mean_std(const float mean[3], const float std[3], float alpha = 1/255.0f, ChannelType channel_type=ChannelType::None);

    /**
     * \brief: 静态方法, 可以不实例化 Norm, 仅仅通过 Norm::alpha_beta() 来调用; 一般可以用于归一化的参数赋值
     * 
     */ 
    // out = x * alpha + beta
    static Norm alpha_beta(float alpha, float beta = 0, ChannelType channel_type=ChannelType::None);

    // None
    static Norm None();
};


// 封装的话, 可以放到Cpp之中; 结构体的定义可以放到H文件之中
Norm Norm::mean_std(const float mean[3], const float std[3], float alpha, ChannelType channel_type)
{
    Norm out;
    out.normType_    = NormType::MeanStd;
    out.alpha_       = alpha;
    out.channelType_ = channel_type;
    memcpy(out.mean_, mean, sizeof(out.mean_));
    memcpy(out.std_,  std,  sizeof(out.std_));
    return out;
}

Norm Norm::alpha_beta(float alpha, float beta, ChannelType channel_type)
{
    Norm out;
    out.normType_    = NormType::AlphaBeta;
    out.alpha_       = alpha;
    out.beta_        = beta;
    out.channelType_ = channel_type;
    memset(out.mean_, 0, 3 * sizeof(float));
    out.std_[0] = 1.0f; 
    out.std_[1] = 1.0f;
    out.std_[2] = 1.0f;  
    return out;
}

Norm Norm::None()
{
    return Norm();
}

3. 归一化、减均值除方差、hwc2chw的opencv实现封装；

/**
 *@brief: out = (x * alpha - mean) / std, 标准化(减均值除方差)
* alpha 为 1; 
*@return: 3通道
*/ 
void StandNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size)
{   
    cv::Mat img_cvtColr;    // 需要的话, 进行通道转换, bgr2rgb 或者 rgb2bgr
    cv::Mat img_convert;    // 利用cv::Mat的 convertTo 进行归一化除以255.0f
    cv::Mat norm_img;       // 存储减均值除方之后的数据
    cv::Mat img_resize;     // 是否需要进行 resize
    int nModelChannels = 3;

    if (in.channelType_ == ChannelType::Invert)
    {   
        cv::cvtColor(src_mat, img_cvtColr, cv::COLOR_BGR2RGB);  // inplace
    }

    if (src_mat.cols != model_size.width || src_mat.rows != model_size.height)
    {   
        //! 需要resize 的情况是 原图resize 还是 cvtColor的resize, 因此要判断是否需要 Invert
        if (in.channelType_ == ChannelType::Invert)
        {
            cv::resize(img_cvtColr, img_resize, model_size);
        }
        else
        {
            cv::resize(src_mat, img_resize, model_size);
        }

        img_resize.convertTo(img_convert, CV_32FC3, in.alpha_);
    }
    else
    {   
        //! 无需resize; 原图convert 还是 cvtColor的convert, 因此要判断是否需要 Invert
        if (in.channelType_ == ChannelType::Invert)
        {
            img_cvtColr.convertTo(img_convert, CV_32FC3, in.alpha_);
        }
        else
        {
            src_mat.convertTo(img_convert, CV_32FC3, in.alpha_);
        }
    }


    cv::Scalar mean(in.mean_[0], in.mean_[1], in.mean_[2]);
    cv::Scalar std(in.std_[0], in.std_[1], in.std_[2]);
    cv::Mat mean_mat(model_size, CV_32FC3, mean);
    cv::Mat std_mat(model_size, CV_32FC3, std);
    norm_img = (img_convert - mean_mat) / std_mat;

    //! 如果模型输入是fp16数据类型, 则需要在这里进行转换
    // cv:: fp16norm_img;
    // norm_img.convertTo(fp16norm_img, CV_16FC3);

    std::vector<cv::Mat> imgArray(nModelChannels);
    //! chw -> hwc
    cv::split(norm_img, imgArray);

    //! 这里我们采用的是 flaot32 数据类型
    size_t dst_plane_size = model_size.area() * sizeof(float);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer), 
        imgArray[0].data, imgArray[0].step.p[0]*imgArray[0].size[0]);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + dst_plane_size, 
        imgArray[1].data, imgArray[1].step.p[0]*imgArray[1].size[0]);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + 2*dst_plane_size, 
        imgArray[2].data, imgArray[2].step.p[0]*imgArray[2].size[0]);
    return;
}

/**
 *@brief: out = x * alpha + beta, 归一化
* alpha = 1/255
* beta 为 0; 
*@return: 3通道
*/  
void MaxMinNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size)
{   
    cv::Mat img_cvtColr;
    cv::Mat img_convert;
    cv::Mat norm_img;
    cv::Mat img_resize;
    int nModelChannels = 3;

    if (in.channelType_ == ChannelType::Invert)
    {   
        cv::cvtColor(src_mat, img_cvtColr, cv::COLOR_BGR2RGB);  // inplace
    }

    if (src_mat.cols != model_size.width || src_mat.rows != model_size.height)
    {   
        //! 需要resize 的情况是 原图resize 还是 cvtColor的resize
        if (in.channelType_ == ChannelType::Invert)
        {
            cv::resize(img_cvtColr, img_resize, model_size);
        }
        else
        {
            cv::resize(src_mat, img_resize, model_size);
        }
        img_resize.convertTo(img_convert, CV_32FC3, in.alpha_);
    }
    else
    {   
        //! 无需resize; 原图convert 还是 cvtColor的convert
        if (in.channelType_ == ChannelType::Invert)
        {
            img_cvtColr.convertTo(img_convert, CV_32FC3, in.alpha_);
        }
        else
        {
            src_mat.convertTo(img_convert, CV_32FC3, in.alpha_);
        }
    }

    //! 如果模型输入是fp16数据类型, 则需要在这里进行转换
    // cv:: fp16norm_img;
    // norm_img.convertTo(fp16norm_img, CV_16FC3);

    std::vector<cv::Mat> imgArray(nModelChannels);
    //! chw -> hwc
    cv::split(img_convert, imgArray);

    //! 这里我们采用的是 flaot32 数据类型
    size_t dst_plane_size = model_size.area() * sizeof(float);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer), 
        imgArray[0].data, imgArray[0].step.p[0]*imgArray[0].size[0]);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + dst_plane_size, 
        imgArray[1].data, imgArray[1].step.p[0]*imgArray[1].size[0]);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + 2*dst_plane_size, 
        imgArray[2].data, imgArray[2].step.p[0]*imgArray[2].size[0]);
    return;
}

4. 整合2、3的封装；可以直接拷贝走使用，强烈推荐完整版本，助你起飞

#include "opencv2/opencv.hpp"
#include <vector>
#include <iostream>
#include <memory.h>
#include <vector>
#include <string>


enum class NormType : int{
    None      = 0,
    MeanStd   = 1,
    AlphaBeta = 2
};

enum class ChannelType : int{
    None          = 0,
    Invert        = 1
};

/**
 * \brief: 为了预处理方便, 我们对预处理需要参数进行封装;
 * 
 */ 
struct Norm{
    float mean_[3];
    float std_[3];
    float alpha_;
    float beta_;

    NormType    normType_    = NormType::None;
    ChannelType channelType_ = ChannelType::None;

    /**
     * \brief: 静态方法, 可以不实例化 Norm, 仅仅通过 Norm::mean_std() 来调用; 减均值除方差的参数赋值
     */ 
    // out = (x * alpha - mean) / std
    static Norm mean_std(const float mean[3], const float std[3], float alpha = 1/255.0f, ChannelType channel_type=ChannelType::None);

    /**
     * \brief: 静态方法, 可以不实例化 Norm, 仅仅通过 Norm::alpha_beta() 来调用; 一般可以用于归一化的参数赋值
     * 
     */ 
    // out = x * alpha + beta
    static Norm alpha_beta(float alpha, float beta = 0, ChannelType channel_type=ChannelType::None);

    // None
    static Norm None();
};


/**
 *@brief: out = x * alpha + beta, 归一化
* alpha = 1/255
* beta 为 0; 
*@return: 3通道
*/  
void MaxMinNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size);


/**
 *@brief: out = (x * alpha - mean) / std, 标准化(减均值除方差)
* alpha 为 1; 
*@return: 3通道
*/ 
void StandNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size);

using namespace std;

int main(int argc, char* argv[]){
    cv::Size model_size(224, 224);
    string img_path = "/home/wkx/Cambricon-MLU270/history_img/demo_preprocess/demo/1.jpg";
    auto img = cv::imread(img_path, cv::IMREAD_COLOR);  //cv::IMREAD_COLOR 明确指定以 BGR形式读入图像
    assert(!img.empty());
    float mean_rgb[3] = {0.485, 0.456, 0.406};
    float std_rgb[3]  = {0.229, 0.224, 0.225};
    Norm normalize_;
    // ChannelType::None 表示不进行 BGR2RGB
    normalize_ = Norm::mean_std(mean_rgb, std_rgb, 1/255.0f, ChannelType::None);

    float* model_input_space = new float[model_size.area()*3];
    //! 多batch的话, 可以自己进行偏移
    StandNorm_c3(model_input_space, img, normalize_, model_size);
    // 做完预处理, 如果你使用的trt你就可以将数据拷贝到 gpu进行推理了
    // ...  cudaMemcpy(dst, src, src_size, cudaMemcpyHostToDevice); //! 建议对返回值增加判断
    // .. forward();


    // 仅仅做归一化, 不做减均值除方差
    // normalize_ = Norm::alpha_beta(1/255.0f, 0.0f, ChannelType::None);
    // MaxMinNorm_c3(model_input_space, img, normalize_, model_size);
    // ...  cudaMemcpy(dst, src, src_size, cudaMemcpyHostToDevice); //! 建议对返回值增加判断
    // .. forward();

    delete [] model_input_space;
    return 0;
}



// 封装的话, 可以放到Cpp之中; 结构体的定义可以放到H文件之中
Norm Norm::mean_std(const float mean[3], const float std[3], float alpha, ChannelType channel_type)
{
    Norm out;
    out.normType_    = NormType::MeanStd;
    out.alpha_       = alpha;
    out.channelType_ = channel_type;
    memcpy(out.mean_, mean, sizeof(out.mean_));
    memcpy(out.std_,  std,  sizeof(out.std_));
    return out;
}

Norm Norm::alpha_beta(float alpha, float beta, ChannelType channel_type)
{
    Norm out;
    out.normType_    = NormType::AlphaBeta;
    out.alpha_       = alpha;
    out.beta_        = beta;
    out.channelType_ = channel_type;
    memset(out.mean_, 0, 3 * sizeof(float));
    out.std_[0] = 1.0f; 
    out.std_[1] = 1.0f;
    out.std_[2] = 1.0f;  
    return out;
}

Norm Norm::None()
{
    return Norm();
}


/**
 *@brief: out = (x * alpha - mean) / std, 标准化(减均值除方差)
* alpha 为 1; 
*@return: 3通道
*/ 
void StandNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size)
{   
    cv::Mat img_cvtColr;    // 需要的话, 进行通道转换, bgr2rgb 或者 rgb2bgr
    cv::Mat img_convert;    // 利用cv::Mat的 convertTo 进行归一化除以255.0f
    cv::Mat norm_img;       // 存储减均值除方之后的数据
    cv::Mat img_resize;     // 是否需要进行 resize
    int nModelChannels = 3;

    if (in.channelType_ == ChannelType::Invert)
    {   
        cv::cvtColor(src_mat, img_cvtColr, cv::COLOR_BGR2RGB);  // inplace
    }

    if (src_mat.cols != model_size.width || src_mat.rows != model_size.height)
    {   
        //! 需要resize 的情况是 原图resize 还是 cvtColor的resize, 因此要判断是否需要 Invert
        if (in.channelType_ == ChannelType::Invert)
        {
            cv::resize(img_cvtColr, img_resize, model_size);
        }
        else
        {
            cv::resize(src_mat, img_resize, model_size);
        }

        img_resize.convertTo(img_convert, CV_32FC3, in.alpha_);
    }
    else
    {   
        //! 无需resize; 原图convert 还是 cvtColor的convert, 因此要判断是否需要 Invert
        if (in.channelType_ == ChannelType::Invert)
        {
            img_cvtColr.convertTo(img_convert, CV_32FC3, in.alpha_);
        }
        else
        {
            src_mat.convertTo(img_convert, CV_32FC3, in.alpha_);
        }
    }


    cv::Scalar mean(in.mean_[0], in.mean_[1], in.mean_[2]);
    cv::Scalar std(in.std_[0], in.std_[1], in.std_[2]);
    cv::Mat mean_mat(model_size, CV_32FC3, mean);
    cv::Mat std_mat(model_size, CV_32FC3, std);
    norm_img = (img_convert - mean_mat) / std_mat;

    //! 如果模型输入是fp16数据类型, 则需要在这里进行转换
    // cv:: fp16norm_img;
    // norm_img.convertTo(fp16norm_img, CV_16FC3);

    std::vector<cv::Mat> imgArray(nModelChannels);
    //! chw -> hwc
    cv::split(norm_img, imgArray);

    //! 这里我们采用的是 flaot32 数据类型
    size_t dst_plane_size = model_size.area() * sizeof(float);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer), 
        imgArray[0].data, imgArray[0].step.p[0]*imgArray[0].size[0]);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + dst_plane_size, 
        imgArray[1].data, imgArray[1].step.p[0]*imgArray[1].size[0]);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + 2*dst_plane_size, 
        imgArray[2].data, imgArray[2].step.p[0]*imgArray[2].size[0]);
    return;
}

/**
 *@brief: out = x * alpha + beta, 归一化
* alpha = 1/255
* beta 为 0; 
*@return: 3通道
*/  
void MaxMinNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size)
{   
    cv::Mat img_cvtColr;
    cv::Mat img_convert;
    cv::Mat norm_img;
    cv::Mat img_resize;
    int nModelChannels = 3;

    if (in.channelType_ == ChannelType::Invert)
    {   
        cv::cvtColor(src_mat, img_cvtColr, cv::COLOR_BGR2RGB);  // inplace
    }

    if (src_mat.cols != model_size.width || src_mat.rows != model_size.height)
    {   
        //! 需要resize 的情况是 原图resize 还是 cvtColor的resize
        if (in.channelType_ == ChannelType::Invert)
        {
            cv::resize(img_cvtColr, img_resize, model_size);
        }
        else
        {
            cv::resize(src_mat, img_resize, model_size);
        }
        img_resize.convertTo(img_convert, CV_32FC3, in.alpha_);
    }
    else
    {   
        //! 无需resize; 原图convert 还是 cvtColor的convert
        if (in.channelType_ == ChannelType::Invert)
        {
            img_cvtColr.convertTo(img_convert, CV_32FC3, in.alpha_);
        }
        else
        {
            src_mat.convertTo(img_convert, CV_32FC3, in.alpha_);
        }
    }

    //! 如果模型输入是fp16数据类型, 则需要在这里进行转换
    // cv:: fp16norm_img;
    // norm_img.convertTo(fp16norm_img, CV_16FC3);

    std::vector<cv::Mat> imgArray(nModelChannels);
    //! chw -> hwc
    cv::split(img_convert, imgArray);

    //! 这里我们采用的是 flaot32 数据类型
    size_t dst_plane_size = model_size.area() * sizeof(float);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer), 
        imgArray[0].data, imgArray[0].step.p[0]*imgArray[0].size[0]);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + dst_plane_size, 
        imgArray[1].data, imgArray[1].step.p[0]*imgArray[1].size[0]);
    memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + 2*dst_plane_size, 
        imgArray[2].data, imgArray[2].step.p[0]*imgArray[2].size[0]);
    return;
}

5.简单代码, 不建议使用；for循环实现 hwc --> chw

代码如下：

	using namespace cv;
	Mat MatBGRImage = imread("wokspace/my_src.jpg");
    auto t0 = iLogger::timestamp_now_float();
    
    Mat RGBImg, ResizeImg;
    cvtColor(MatBGRImage, RGBImg, COLOR_BGR2RGB);
    cv::resize(RGBImg, ResizeImg, Size(224, 224));
    // mean_rgb = [0.485, 0.456, 0.406]
    // std_rgb  = [0.229, 0.224, 0.225]

    int channels = ResizeImg.channels(), height = ResizeImg.rows, width = ResizeImg.cols;

    float* nchwMat = (float*)malloc(channels * height * width * sizeof(float));
    memset(nchwMat, 0, channels * height * width * sizeof(float));

    // Convert HWC to CHW and Normalize
    float mean_rgb[3] = {0.485, 0.456, 0.406};
    float std_rgb[3]  = {0.229, 0.224, 0.225};
    uint8_t* ptMat = ResizeImg.ptr<uint8_t>(0);
    int area = height * width;
    for (int c = 0; c < channels; ++c)
    {
        for (int h = 0; h < height; ++h)
        {
            for (int w = 0; w < width; ++w)
            {
                int srcIdx = c * area + h * width + w;
                int divider = srcIdx / 3;  // 0, 1, 2
                for (int i = 0; i < 3; ++i)
                {
                    nchwMat[divider + i * area] = static_cast<float>((ptMat[srcIdx] * 1.0f/255.0f - mean_rgb[i]) * 1.0f/std_rgb[i] );
                }
            }
        }
    }
	// 复制到GPU
    size_t Src_size = 3 * 224 * 224 * sizeof(float);
    
    for (int i = 0; i < explicit_batch; ++i)
    {
        cudaMemcpy(static_cast<float*>(input_tensor_image->pValue + i * Src_size), 
                nchwMat, Src_size, cudaMemcpyHostToDevice);
    }
    free(nchwMat);
    auto t1 = iLogger::timestamp_now_float();
    auto ms0 = t1 - t0;
    printf("preprocess time: %.3f ms\n", ms0);