转载请注明作者和出处: http://blog.csdn.net/john_bh/
1. blobFromImage与blobFromImages
OpenCV DNN中支持单张图像推断,同时还支持分批次方式的图像推断,对应的两个相关API分别为blobFromImage与blobFromImages,它们的返回对象都是一个四维的Mat对象-按照顺序分别为NCHW 其组织方式详解如下:
- N表示多张图像 ;
- C表示接受输入图像的通道数目 ;
- H表示接受输入图像的高度 ;
- W表示接受输入图像的宽度。
Mat cv::dnn::blobFromImage(
InputArray image,
double scalefactor = 1.0,
const Size & size = Size(),
const Scalar & mean = Scalar(),
bool swapRB = false,
bool crop = false,
int ddepth = CV_32F
)
Mat cv::dnn::blobFromImages(
InputArrayOfArrays images,
double scalefactor = 1.0,
Size size = Size(),
const Scalar & mean = Scalar(),
bool swapRB = false,
bool crop = false,
int ddepth = CV_32F
)
参数解释:
- Images:表示多张图像,image表示单张图像 ;
- Scalefactor:表示放缩 ;
- Size:表示图像大小 ;
- Mean:表示均值;
- swapRB:是否交换通道 ;
- crop:是否剪切 ;
- ddepth: 输出的类型,默认是浮点数格式。
2. 代码实现
2.1 C++
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
/******************************************************
*
********************************************************/
using namespace cv;
using namespace cv::dnn;
using namespace std;
String bin_model = "D:/projects/opencv_tutorial/data/models/googlenet/bvlc_googlenet.caffemodel";
String protxt = "D:/projects/opencv_tutorial/data/models/googlenet/bvlc_googlenet.prototxt";
String labels_txt_file = "D:/vcworkspaces/classification_classes_ILSVRC2012.txt";
vector<String> readClassNames();
int main(int argc, char** argv) {
Mat image1 = imread("D:/images/cat.jpg");
Mat image2 = imread("D:/images/aeroplane.jpg");
vector<Mat> images;
images.push_back(image1);
images.push_back(image2);
vector<String> labels = readClassNames();
int w = 224;
int h = 224;
// 加载网络
Net net = readNetFromCaffe(protxt, bin_model);
net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
net.setPreferableTarget(DNN_TARGET_CPU);
if (net.empty()) {
printf("read caffe model data failure...\n");
return -1;
}
Mat inputBlob = blobFromImages(images, 1.0, Size(w, h), Scalar(104, 117, 123), false, false);
// 执行图像分类
Mat prob;
net.setInput(inputBlob);
prob = net.forward();
vector<double> times;
double time = net.getPerfProfile(times);
float ms = (time * 1000) / getTickFrequency();
printf("current inference time : %.2f ms \n", ms);
// 得到最可能分类输出
for (int n = 0; n < prob.rows; n++) {
Point classNumber;
double classProb;
Mat probMat = prob(Rect(0, n, 1000, 1)).clone();
Mat result = probMat.reshape(1, 1);
minMaxLoc(result, NULL, &classProb, NULL, &classNumber);
int classidx = classNumber.x;
printf("\n current image classification : %s, possible : %.2f\n", labels.at(classidx).c_str(), classProb);
// 显示文本
putText(images[n], labels.at(classidx), Point(20, 50), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2, 8);
imshow("Image Classification", images[n]);
waitKey(0);
}
return 0;
}
std::vector<String> readClassNames()
{
std::vector<String> classNames;
std::ifstream fp(labels_txt_file);
if (!fp.is_open())
{
printf("could not open file...\n");
exit(-1);
}
std::string name;
while (!fp.eof())
{
std::getline(fp, name);
if (name.length())
classNames.push_back(name);
}
fp.close();
return classNames;
}
2.2 Python
"""
DNN单张与多张图像的推断
"""
import cv2 as cv
import numpy as np
bin_model = "bvlc_googlenet.caffemodel"
protxt = "bvlc_googlenet.prototxt"
# Load names of classes
classes = None
with open("classification_classes_ILSVRC2012.txt", 'rt') as f:
classes = f.read().rstrip('\n').split('\n')
# load CNN model
net = cv.dnn.readNetFromCaffe(protxt, bin_model)
# read input data
image1 = cv.imread("images/dog.jpg")
image2 = cv.imread("images/airplane.jpg")
images = []
images.append(image1)
images.append(image2)
blobs = cv.dnn.blobFromImages(np.asarray(images), 1.0, (224, 224), (104, 117,123), False, crop=False)
print(blobs.shape)
# Run a model
net.setInput(blobs)
out = net.forward()
# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
print(out.shape)
# Get a class with a highest score.
for i in range(len(out)):
classId = np.argmax(out[i])
confidence = out[i][classId]
cv.putText(images[i], label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0))
# Print predicted class.
text_label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence)
cv.putText(images[i], text_label, (50, 50), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
cv.namedWindow("googlenet-demo", cv.WINDOW_NORMAL)
cv.imshow("googlenet-demo", images[i])
cv.waitKey(0)
cv.destroyAllWindows()
3.结果展示