文章目录
1. hwc 和 chw 的内存排布区别:
opencv: 原始数据(rgb)排布(hwc):假如是 width =5, height =3;
rgbrgbrgbrgbrgb
rgbrgbrgbrgbrgb
rgbrgbrgbrgbrgb
目标排布(chw): 假如是 width =5, height =3;
rrrrr
rrrrr
rrrrr
ggggg
ggggg
ggggg
bbbbb
bbbbb
bbbbb
2. 预处理参数封装
enum class NormType : int{
None = 0,
MeanStd = 1,
AlphaBeta = 2
};
enum class ChannelType : int{
None = 0,
Invert = 1
};
/**
* \brief: 为了预处理方便, 我们对预处理需要参数进行封装;
*
*/
struct Norm{
float mean_[3];
float std_[3];
float alpha_;
float beta_;
NormType normType_ = NormType::None;
ChannelType channelType_ = ChannelType::None;
/**
* \brief: 静态方法, 可以不实例化 Norm, 仅仅通过 Norm::mean_std() 来调用; 减均值除方差的参数赋值
*/
// out = (x * alpha - mean) / std
static Norm mean_std(const float mean[3], const float std[3], float alpha = 1/255.0f, ChannelType channel_type=ChannelType::None);
/**
* \brief: 静态方法, 可以不实例化 Norm, 仅仅通过 Norm::alpha_beta() 来调用; 一般可以用于归一化的参数赋值
*
*/
// out = x * alpha + beta
static Norm alpha_beta(float alpha, float beta = 0, ChannelType channel_type=ChannelType::None);
// None
static Norm None();
};
// 封装的话, 可以放到Cpp之中; 结构体的定义可以放到H文件之中
Norm Norm::mean_std(const float mean[3], const float std[3], float alpha, ChannelType channel_type)
{
Norm out;
out.normType_ = NormType::MeanStd;
out.alpha_ = alpha;
out.channelType_ = channel_type;
memcpy(out.mean_, mean, sizeof(out.mean_));
memcpy(out.std_, std, sizeof(out.std_));
return out;
}
Norm Norm::alpha_beta(float alpha, float beta, ChannelType channel_type)
{
Norm out;
out.normType_ = NormType::AlphaBeta;
out.alpha_ = alpha;
out.beta_ = beta;
out.channelType_ = channel_type;
memset(out.mean_, 0, 3 * sizeof(float));
out.std_[0] = 1.0f;
out.std_[1] = 1.0f;
out.std_[2] = 1.0f;
return out;
}
Norm Norm::None()
{
return Norm();
}
3. 归一化、减均值除方差、hwc2chw的opencv实现封装;
/**
*@brief: out = (x * alpha - mean) / std, 标准化(减均值除方差)
* alpha 为 1;
*@return: 3通道
*/
void StandNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size)
{
cv::Mat img_cvtColr; // 需要的话, 进行通道转换, bgr2rgb 或者 rgb2bgr
cv::Mat img_convert; // 利用cv::Mat的 convertTo 进行归一化除以255.0f
cv::Mat norm_img; // 存储减均值除方之后的数据
cv::Mat img_resize; // 是否需要进行 resize
int nModelChannels = 3;
if (in.channelType_ == ChannelType::Invert)
{
cv::cvtColor(src_mat, img_cvtColr, cv::COLOR_BGR2RGB); // inplace
}
if (src_mat.cols != model_size.width || src_mat.rows != model_size.height)
{
//! 需要resize 的情况是 原图resize 还是 cvtColor的resize, 因此要判断是否需要 Invert
if (in.channelType_ == ChannelType::Invert)
{
cv::resize(img_cvtColr, img_resize, model_size);
}
else
{
cv::resize(src_mat, img_resize, model_size);
}
img_resize.convertTo(img_convert, CV_32FC3, in.alpha_);
}
else
{
//! 无需resize; 原图convert 还是 cvtColor的convert, 因此要判断是否需要 Invert
if (in.channelType_ == ChannelType::Invert)
{
img_cvtColr.convertTo(img_convert, CV_32FC3, in.alpha_);
}
else
{
src_mat.convertTo(img_convert, CV_32FC3, in.alpha_);
}
}
cv::Scalar mean(in.mean_[0], in.mean_[1], in.mean_[2]);
cv::Scalar std(in.std_[0], in.std_[1], in.std_[2]);
cv::Mat mean_mat(model_size, CV_32FC3, mean);
cv::Mat std_mat(model_size, CV_32FC3, std);
norm_img = (img_convert - mean_mat) / std_mat;
//! 如果模型输入是fp16数据类型, 则需要在这里进行转换
// cv:: fp16norm_img;
// norm_img.convertTo(fp16norm_img, CV_16FC3);
std::vector<cv::Mat> imgArray(nModelChannels);
//! chw -> hwc
cv::split(norm_img, imgArray);
//! 这里我们采用的是 flaot32 数据类型
size_t dst_plane_size = model_size.area() * sizeof(float);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer),
imgArray[0].data, imgArray[0].step.p[0]*imgArray[0].size[0]);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + dst_plane_size,
imgArray[1].data, imgArray[1].step.p[0]*imgArray[1].size[0]);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + 2*dst_plane_size,
imgArray[2].data, imgArray[2].step.p[0]*imgArray[2].size[0]);
return;
}
/**
*@brief: out = x * alpha + beta, 归一化
* alpha = 1/255
* beta 为 0;
*@return: 3通道
*/
void MaxMinNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size)
{
cv::Mat img_cvtColr;
cv::Mat img_convert;
cv::Mat norm_img;
cv::Mat img_resize;
int nModelChannels = 3;
if (in.channelType_ == ChannelType::Invert)
{
cv::cvtColor(src_mat, img_cvtColr, cv::COLOR_BGR2RGB); // inplace
}
if (src_mat.cols != model_size.width || src_mat.rows != model_size.height)
{
//! 需要resize 的情况是 原图resize 还是 cvtColor的resize
if (in.channelType_ == ChannelType::Invert)
{
cv::resize(img_cvtColr, img_resize, model_size);
}
else
{
cv::resize(src_mat, img_resize, model_size);
}
img_resize.convertTo(img_convert, CV_32FC3, in.alpha_);
}
else
{
//! 无需resize; 原图convert 还是 cvtColor的convert
if (in.channelType_ == ChannelType::Invert)
{
img_cvtColr.convertTo(img_convert, CV_32FC3, in.alpha_);
}
else
{
src_mat.convertTo(img_convert, CV_32FC3, in.alpha_);
}
}
//! 如果模型输入是fp16数据类型, 则需要在这里进行转换
// cv:: fp16norm_img;
// norm_img.convertTo(fp16norm_img, CV_16FC3);
std::vector<cv::Mat> imgArray(nModelChannels);
//! chw -> hwc
cv::split(img_convert, imgArray);
//! 这里我们采用的是 flaot32 数据类型
size_t dst_plane_size = model_size.area() * sizeof(float);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer),
imgArray[0].data, imgArray[0].step.p[0]*imgArray[0].size[0]);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + dst_plane_size,
imgArray[1].data, imgArray[1].step.p[0]*imgArray[1].size[0]);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + 2*dst_plane_size,
imgArray[2].data, imgArray[2].step.p[0]*imgArray[2].size[0]);
return;
}
4. 整合2、3的封装;可以直接拷贝走使用,强烈推荐完整版本,助你起飞
#include "opencv2/opencv.hpp"
#include <vector>
#include <iostream>
#include <memory.h>
#include <vector>
#include <string>
enum class NormType : int{
None = 0,
MeanStd = 1,
AlphaBeta = 2
};
enum class ChannelType : int{
None = 0,
Invert = 1
};
/**
* \brief: 为了预处理方便, 我们对预处理需要参数进行封装;
*
*/
struct Norm{
float mean_[3];
float std_[3];
float alpha_;
float beta_;
NormType normType_ = NormType::None;
ChannelType channelType_ = ChannelType::None;
/**
* \brief: 静态方法, 可以不实例化 Norm, 仅仅通过 Norm::mean_std() 来调用; 减均值除方差的参数赋值
*/
// out = (x * alpha - mean) / std
static Norm mean_std(const float mean[3], const float std[3], float alpha = 1/255.0f, ChannelType channel_type=ChannelType::None);
/**
* \brief: 静态方法, 可以不实例化 Norm, 仅仅通过 Norm::alpha_beta() 来调用; 一般可以用于归一化的参数赋值
*
*/
// out = x * alpha + beta
static Norm alpha_beta(float alpha, float beta = 0, ChannelType channel_type=ChannelType::None);
// None
static Norm None();
};
/**
*@brief: out = x * alpha + beta, 归一化
* alpha = 1/255
* beta 为 0;
*@return: 3通道
*/
void MaxMinNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size);
/**
*@brief: out = (x * alpha - mean) / std, 标准化(减均值除方差)
* alpha 为 1;
*@return: 3通道
*/
void StandNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size);
using namespace std;
int main(int argc, char* argv[]){
cv::Size model_size(224, 224);
string img_path = "/home/wkx/Cambricon-MLU270/history_img/demo_preprocess/demo/1.jpg";
auto img = cv::imread(img_path, cv::IMREAD_COLOR); //cv::IMREAD_COLOR 明确指定以 BGR形式读入图像
assert(!img.empty());
float mean_rgb[3] = {0.485, 0.456, 0.406};
float std_rgb[3] = {0.229, 0.224, 0.225};
Norm normalize_;
// ChannelType::None 表示不进行 BGR2RGB
normalize_ = Norm::mean_std(mean_rgb, std_rgb, 1/255.0f, ChannelType::None);
float* model_input_space = new float[model_size.area()*3];
//! 多batch的话, 可以自己进行偏移
StandNorm_c3(model_input_space, img, normalize_, model_size);
// 做完预处理, 如果你使用的trt你就可以将数据拷贝到 gpu进行推理了
// ... cudaMemcpy(dst, src, src_size, cudaMemcpyHostToDevice); //! 建议对返回值增加判断
// .. forward();
// 仅仅做归一化, 不做减均值除方差
// normalize_ = Norm::alpha_beta(1/255.0f, 0.0f, ChannelType::None);
// MaxMinNorm_c3(model_input_space, img, normalize_, model_size);
// ... cudaMemcpy(dst, src, src_size, cudaMemcpyHostToDevice); //! 建议对返回值增加判断
// .. forward();
delete [] model_input_space;
return 0;
}
// 封装的话, 可以放到Cpp之中; 结构体的定义可以放到H文件之中
Norm Norm::mean_std(const float mean[3], const float std[3], float alpha, ChannelType channel_type)
{
Norm out;
out.normType_ = NormType::MeanStd;
out.alpha_ = alpha;
out.channelType_ = channel_type;
memcpy(out.mean_, mean, sizeof(out.mean_));
memcpy(out.std_, std, sizeof(out.std_));
return out;
}
Norm Norm::alpha_beta(float alpha, float beta, ChannelType channel_type)
{
Norm out;
out.normType_ = NormType::AlphaBeta;
out.alpha_ = alpha;
out.beta_ = beta;
out.channelType_ = channel_type;
memset(out.mean_, 0, 3 * sizeof(float));
out.std_[0] = 1.0f;
out.std_[1] = 1.0f;
out.std_[2] = 1.0f;
return out;
}
Norm Norm::None()
{
return Norm();
}
/**
*@brief: out = (x * alpha - mean) / std, 标准化(减均值除方差)
* alpha 为 1;
*@return: 3通道
*/
void StandNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size)
{
cv::Mat img_cvtColr; // 需要的话, 进行通道转换, bgr2rgb 或者 rgb2bgr
cv::Mat img_convert; // 利用cv::Mat的 convertTo 进行归一化除以255.0f
cv::Mat norm_img; // 存储减均值除方之后的数据
cv::Mat img_resize; // 是否需要进行 resize
int nModelChannels = 3;
if (in.channelType_ == ChannelType::Invert)
{
cv::cvtColor(src_mat, img_cvtColr, cv::COLOR_BGR2RGB); // inplace
}
if (src_mat.cols != model_size.width || src_mat.rows != model_size.height)
{
//! 需要resize 的情况是 原图resize 还是 cvtColor的resize, 因此要判断是否需要 Invert
if (in.channelType_ == ChannelType::Invert)
{
cv::resize(img_cvtColr, img_resize, model_size);
}
else
{
cv::resize(src_mat, img_resize, model_size);
}
img_resize.convertTo(img_convert, CV_32FC3, in.alpha_);
}
else
{
//! 无需resize; 原图convert 还是 cvtColor的convert, 因此要判断是否需要 Invert
if (in.channelType_ == ChannelType::Invert)
{
img_cvtColr.convertTo(img_convert, CV_32FC3, in.alpha_);
}
else
{
src_mat.convertTo(img_convert, CV_32FC3, in.alpha_);
}
}
cv::Scalar mean(in.mean_[0], in.mean_[1], in.mean_[2]);
cv::Scalar std(in.std_[0], in.std_[1], in.std_[2]);
cv::Mat mean_mat(model_size, CV_32FC3, mean);
cv::Mat std_mat(model_size, CV_32FC3, std);
norm_img = (img_convert - mean_mat) / std_mat;
//! 如果模型输入是fp16数据类型, 则需要在这里进行转换
// cv:: fp16norm_img;
// norm_img.convertTo(fp16norm_img, CV_16FC3);
std::vector<cv::Mat> imgArray(nModelChannels);
//! chw -> hwc
cv::split(norm_img, imgArray);
//! 这里我们采用的是 flaot32 数据类型
size_t dst_plane_size = model_size.area() * sizeof(float);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer),
imgArray[0].data, imgArray[0].step.p[0]*imgArray[0].size[0]);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + dst_plane_size,
imgArray[1].data, imgArray[1].step.p[0]*imgArray[1].size[0]);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + 2*dst_plane_size,
imgArray[2].data, imgArray[2].step.p[0]*imgArray[2].size[0]);
return;
}
/**
*@brief: out = x * alpha + beta, 归一化
* alpha = 1/255
* beta 为 0;
*@return: 3通道
*/
void MaxMinNorm_c3(void* model_input_buffer, cv::Mat& src_mat, Norm& in, cv::Size& model_size)
{
cv::Mat img_cvtColr;
cv::Mat img_convert;
cv::Mat norm_img;
cv::Mat img_resize;
int nModelChannels = 3;
if (in.channelType_ == ChannelType::Invert)
{
cv::cvtColor(src_mat, img_cvtColr, cv::COLOR_BGR2RGB); // inplace
}
if (src_mat.cols != model_size.width || src_mat.rows != model_size.height)
{
//! 需要resize 的情况是 原图resize 还是 cvtColor的resize
if (in.channelType_ == ChannelType::Invert)
{
cv::resize(img_cvtColr, img_resize, model_size);
}
else
{
cv::resize(src_mat, img_resize, model_size);
}
img_resize.convertTo(img_convert, CV_32FC3, in.alpha_);
}
else
{
//! 无需resize; 原图convert 还是 cvtColor的convert
if (in.channelType_ == ChannelType::Invert)
{
img_cvtColr.convertTo(img_convert, CV_32FC3, in.alpha_);
}
else
{
src_mat.convertTo(img_convert, CV_32FC3, in.alpha_);
}
}
//! 如果模型输入是fp16数据类型, 则需要在这里进行转换
// cv:: fp16norm_img;
// norm_img.convertTo(fp16norm_img, CV_16FC3);
std::vector<cv::Mat> imgArray(nModelChannels);
//! chw -> hwc
cv::split(img_convert, imgArray);
//! 这里我们采用的是 flaot32 数据类型
size_t dst_plane_size = model_size.area() * sizeof(float);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer),
imgArray[0].data, imgArray[0].step.p[0]*imgArray[0].size[0]);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + dst_plane_size,
imgArray[1].data, imgArray[1].step.p[0]*imgArray[1].size[0]);
memcpy(reinterpret_cast<uint8_t*>(model_input_buffer) + 2*dst_plane_size,
imgArray[2].data, imgArray[2].step.p[0]*imgArray[2].size[0]);
return;
}
5.简单代码, 不建议使用;for循环实现 hwc --> chw
代码如下:
using namespace cv;
Mat MatBGRImage = imread("wokspace/my_src.jpg");
auto t0 = iLogger::timestamp_now_float();
Mat RGBImg, ResizeImg;
cvtColor(MatBGRImage, RGBImg, COLOR_BGR2RGB);
cv::resize(RGBImg, ResizeImg, Size(224, 224));
// mean_rgb = [0.485, 0.456, 0.406]
// std_rgb = [0.229, 0.224, 0.225]
int channels = ResizeImg.channels(), height = ResizeImg.rows, width = ResizeImg.cols;
float* nchwMat = (float*)malloc(channels * height * width * sizeof(float));
memset(nchwMat, 0, channels * height * width * sizeof(float));
// Convert HWC to CHW and Normalize
float mean_rgb[3] = {0.485, 0.456, 0.406};
float std_rgb[3] = {0.229, 0.224, 0.225};
uint8_t* ptMat = ResizeImg.ptr<uint8_t>(0);
int area = height * width;
for (int c = 0; c < channels; ++c)
{
for (int h = 0; h < height; ++h)
{
for (int w = 0; w < width; ++w)
{
int srcIdx = c * area + h * width + w;
int divider = srcIdx / 3; // 0, 1, 2
for (int i = 0; i < 3; ++i)
{
nchwMat[divider + i * area] = static_cast<float>((ptMat[srcIdx] * 1.0f/255.0f - mean_rgb[i]) * 1.0f/std_rgb[i] );
}
}
}
}
// 复制到GPU
size_t Src_size = 3 * 224 * 224 * sizeof(float);
for (int i = 0; i < explicit_batch; ++i)
{
cudaMemcpy(static_cast<float*>(input_tensor_image->pValue + i * Src_size),
nchwMat, Src_size, cudaMemcpyHostToDevice);
}
free(nchwMat);
auto t1 = iLogger::timestamp_now_float();
auto ms0 = t1 - t0;
printf("preprocess time: %.3f ms\n", ms0);
PS(可以的话,请关注博主一波):
如果你觉得我的代码有帮助到你,请关注一波,非常感谢。
使用本教程遇到问题无法解决的话,可以在私信联系我或者在本文章下方进行评论(一般24小时内回复),我会尽我所能帮助你解决问题。