文章目录
1. 常见图像预处理实现
2. opencv-cuda加速图像预处理
参考链接
1. 常见图像预处理实现
与网络训练过程中包含各种数据增强方法不同,在网络的推理阶段常见的图像预处理如下所示:
from PIL import Image
from torchvision import transforms as T
# 1. read image
img = Image.open("image-path.jpg").convert('RGB') # CHW order
# 2. 图像预处理
# resize_size可以为int,也可以是tuple
transform = T.Compose([T.Resize(resize_size, Image.ANTIALIAS),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])])
img = transform(img)
分析上述图像预处理过程:
Image.open("image-path".jpg).convert("RGB")
通过Image读取图像,得到的图像数据排列分布为CHW,且三通道的顺序为RGB
Resize(resize_size, Image.ANTIALIAS)
该操作指的是通过插值的方式缩放输入图像的大小以符合网络的输入尺寸,通常采用的插值方式为双线性插值
ToTensor()
该操作是将numpy的数据类型转换为pytorch的Tensor类型,此外还会将图像的像数值从【0,255】缩放到【0,1】,通过“像素值 / 255”完成
Normalize()
该操作是将输入的数据进行标准化。公式如下:
2. opencv-cuda加速图像预处理
// -------------- opencv ----------------------- #
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
// ---------------- opencv-cuda ---------------- #
#include <opencv2/cudawarping.hpp>
#include <opencv2/cudaarithm.hpp>
#include <opencv2/cudaimgproc.hpp>
// ------------ cuda ------------------------- #
#include <cuda_runtime_api.h>
// ------------------- nvinfer1 ------------------ #
#include "NvInfer.h"
// ------------ standard libraries --------------- #
#include <iostream>
#include <assert.h>
#include <string>
#include <vector>
// ---------------------------------------------- #
void preprocessImage(const std::string& image_path, float* gpu_input,
nvinfer1::Dims3& dims)
{
// read image
cv::Mat frame = cv::imread(image_path);
if(frame.empty())
{
std::cerr << "failed to load image: " << image_path << "!" << std::endl;
return;
}
// upload
cv::cuda::GpuMat gpu_frame;
gpu_frame.upload(frame);
// resize
// CHW order
auto input_width = dims.d[2];
auto input_height = dims.d[1];
auto channels = dims.d[0];
auto input_size = cv::Size(input_width, input_height);
cv::cuda::GpuMat resized;
cv::cuda::resize(gpu_frame, resized, input_size, 0, 0, cv::INTER_LINEAR);
//* ------------------------ Pytorch ToTensor and Normalize ------------------- */
cv::cuda::GpuMat flt_image;
resized.convertTo(flt_image, CV_32FC3, 1.f/255.f);
cv::cuda::subtract(flt_image, cv::Scalar(0.485f, 0.346f, 0.406f), flt_image,
cv::noArray(), -1);
cv::cuda::divide(flt_image, cv::Scalar(0.229f, 0.224f, 0.225f), flt_image, 1, -1);
//* ----------------------------------------------------------------------------------- /
// BGR To RGB
cv::cuda::GpuMat rgb;
cv::cuda::cvtColor(flt_image, rgb, cv::COLOR_BGR2RGB);
// toTensor(copy data to input float pointer channel by channel)
std::vector<cv::cuda::GpuMat> rgb_out;
for(size_t i=0; i<channels; ++i)
{
rgb_out.emplace_back(cv::cuda::GpuMat(cv::Size(input_width, input_height), CV_32FC1, gpu_input + i * input_width * input_height));
}
cv::cuda::split(flt_image, rgb_out); // opencv HWC order -> CHW order
}
// calculate size of tensor
size_t getSizeByDim(const nvinfer1::Dims& dims)
{
size_t size = 1;
for (size_t i = 0; i < dims.nbDims; ++i)
{
size *= dims.d[i];
}
return size;
}
int main()
{
std::string image_path = "../00.jpg";
// CHW order
nvinfer1::Dims3 input_dim(3, 448, 448);
auto input_size = getSizeByDim(input_dim) * sizeof(float);
// allocate gpu memory for network inference
// 此处的buffer可以认为是TensorRT engine推理时在GPU上分配的输入显存
std::vector<void*> buffers(1);
cudaMalloc(&buffers[0], input_size);
// preprocess
preprocessImage(image_path, (float*)buffers[0], input_dim);
// download
cv::cuda::GpuMat gpu_output;
std::vector<cv::cuda::GpuMat> resized;
for (size_t i = 0; i < 3; ++i)
{
resized.emplace_back(cv::cuda::GpuMat(cv::Size(input_dim.d[2], input_dim.d[1]), CV_32FC1, (float*)buffers[0] + i * input_dim.d[2] * input_dim.d[1]));
}
cv::cuda::merge(resized, gpu_output);
cv::cuda::GpuMat image_out;
// normalize
gpu_output.convertTo(image_out, CV_32FC3, 1.f * 255.f);
// download
cv::Mat dst;
image_out.download(dst);
cv::imwrite("../01_test_demo.jpg", dst);
for(void* buf:buffers)
{
cudaFree(buf);
}
return 0;
}
【注意】上述代码测试时,可以将substract和divide两个操作注释,通过观察输出的图像是否为resize之后的图像。
此外,还需要注意的是opencv在CPU和GPU上实现的resize操作有些许差别,调试程序时发现的。
参考链接
我想将OpenCV::cuda::GpuMat类型的数据给TensorRT GPU加速,请问有方法吗? - 知乎 (zhihu.com)
How To Run Inference Using TensorRT C++ API | LearnOpenCV
learnopencv/PyTorch-ONNX-TensorRT-CPP at master · spmallick/learnopencv (github.com)