在工程中也会用OpenCV进行深度学习推理。有时候将opencv推理的数值结果与pytorch等深度学习框架的进行对比,数值会有一些差异。导致差异的一个重要因素就是数据预处理。opencv自带有一个预处理函数blobFromImage,该函数主要作用是:
- 图像预处理
- 将图像数据打包成blob类型
将blobFromImage源码摘了出来,并与torchvision的处理方式对比了一下。
- opencv里按照宽高比大小决定基准边,计算resize的尺寸;torchvision直接以最短边为基准,计算resize尺寸;
- opencv先减去均值,再尺度缩放,没有协方差的处理;torchvision先尺度缩放,再减去均值,再除以协方差;
void blobFromImages(InputArrayOfArrays images_, OutputArray blob_, double scalefactor,
Size size, const Scalar& mean_, bool swapRB, bool crop, int ddepth)
{
CV_TRACE_FUNCTION();
CV_CheckType(ddepth, ddepth == CV_32F || ddepth == CV_8U, "Blob depth should be CV_32F or CV_8U");
// 数据类型默认是CV_32F, 不支持CV_8U
if (ddepth == CV_8U)
{
CV_CheckEQ(scalefactor, 1.0, "Scaling is not supported for CV_8U blob depth");
CV_Assert(mean_ == Scalar() && "Mean subtraction is not supported for CV_8U blob depth");
}
std::vector<Mat> images;
images_.getMatVector(images);
CV_Assert(!images.empty());
// 相当于遍历一个batch里的图片,预处理
for (size_t i = 0; i < images.size(); i++)
{
Size imgSize = images[i].size();
if (size == Size())
size = imgSize;
if (size != imgSize)
{
if(crop) // 按照宽高比resize,再裁剪;这里的resize方式与torchvision不一样
{
float resizeFactor = std::max(size.width / (float)imgSize.width,
size.height / (float)imgSize.height);
resize(images[i], images[i], Size(), resizeFactor, resizeFactor, INTER_LINEAR);
Rect crop(Point(0.5 * (images[i].cols - size.width),
0.5 * (images[i].rows - size.height)),
size);
images[i] = images[i](crop);
}
else // 直接resize
resize(images[i], images[i], size, 0, 0, INTER_LINEAR);
}
if(images[i].depth() == CV_8U && ddepth == CV_32F)
images[i].convertTo(images[i], CV_32F);
Scalar mean = mean_;
if (swapRB)
std::swap(mean[0], mean[2]);
// 先减去均值再尺度缩放,没有协方差的处理;
// torchvision里是先尺度缩放,在减均值,再除以协方差。
images[i] -= mean;
images[i] *= scalefactor;
}
size_t nimages = images.size();
Mat image0 = images[0];
int nch = image0.channels();
CV_Assert(image0.dims == 2);
if (nch == 3 || nch == 4)
{
// 创建四维矩阵 blob
int sz[] = { (int)nimages, nch, image0.rows, image0.cols }; // NCHW
blob_.create(4, sz, ddepth);
Mat blob = blob_.getMat();
Mat ch[4];
for(size_t i = 0; i < nimages; i++ )
{
const Mat& image = images[i];
CV_Assert(image.depth() == blob_.depth());
nch = image.channels();
CV_Assert(image.dims == 2 && (nch == 3 || nch == 4));
CV_Assert(image.size() == image0.size());
for( int j = 0; j < nch; j++ )
ch[j] = Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, j)); // ch[j]指向blob
if(swapRB)
std::swap(ch[0], ch[2]);
// 讲image各通道劈开,填入ch中,于是blob也就有数据了
split(image, ch);
}
}
else
{
// 单通道
CV_Assert(nch == 1);
int sz[] = { (int)nimages, 1, image0.rows, image0.cols };
blob_.create(4, sz, ddepth);
Mat blob = blob_.getMat();
for(size_t i = 0; i < nimages; i++ )
{
const Mat& image = images[i];
CV_Assert(image.depth() == blob_.depth());
nch = image.channels();
CV_Assert(image.dims == 2 && (nch == 1));
CV_Assert(image.size() == image0.size());
image.copyTo(Mat(image.rows, image.cols, ddepth, blob.ptr((int)i, 0)));
}
}
}