OpenCV DNN单张与多张图像的推断

转载请注明作者和出处: http://blog.csdn.net/john_bh/

1. blobFromImage与blobFromImages

OpenCV DNN中支持单张图像推断,同时还支持分批次方式的图像推断,对应的两个相关API分别为blobFromImage与blobFromImages,它们的返回对象都是一个四维的Mat对象-按照顺序分别为NCHW 其组织方式详解如下:

  • N表示多张图像 ;
  • C表示接受输入图像的通道数目 ;
  • H表示接受输入图像的高度 ;
  • W表示接受输入图像的宽度。
Mat cv::dnn::blobFromImage(
	InputArray 	image,
	double 	scalefactor = 1.0,
	const Size & 	size = Size(),
	const Scalar & 	mean = Scalar(),
	bool 	swapRB = false,
	bool 	crop = false,
	int 	ddepth = CV_32F
)

Mat cv::dnn::blobFromImages(
	InputArrayOfArrays 	images,
	double 	scalefactor = 1.0,
	Size 	size = Size(),
	const Scalar & 	mean = Scalar(),
	bool 	swapRB = false,
	bool 	crop = false,
	int 	ddepth = CV_32F 
)

参数解释:

  • Images:表示多张图像,image表示单张图像 ;
  • Scalefactor:表示放缩 ;
  • Size:表示图像大小 ;
  • Mean:表示均值;
  • swapRB:是否交换通道 ;
  • crop:是否剪切 ;
  • ddepth: 输出的类型,默认是浮点数格式。

2. 代码实现

2.1 C++

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
/******************************************************
*
********************************************************/
using namespace cv;
using namespace cv::dnn;
using namespace std;


String bin_model = "D:/projects/opencv_tutorial/data/models/googlenet/bvlc_googlenet.caffemodel";
String protxt = "D:/projects/opencv_tutorial/data/models/googlenet/bvlc_googlenet.prototxt";
String labels_txt_file = "D:/vcworkspaces/classification_classes_ILSVRC2012.txt";
vector<String> readClassNames();
int main(int argc, char** argv) {
	Mat image1 = imread("D:/images/cat.jpg");
	Mat image2 = imread("D:/images/aeroplane.jpg");
	vector<Mat> images;
	images.push_back(image1);
	images.push_back(image2);
	vector<String> labels = readClassNames();

	int w = 224;
	int h = 224;

	// 加载网络
	Net net = readNetFromCaffe(protxt, bin_model);
	net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
	net.setPreferableTarget(DNN_TARGET_CPU);

	if (net.empty()) {
		printf("read caffe model data failure...\n");
		return -1;
	}
	Mat inputBlob = blobFromImages(images, 1.0, Size(w, h), Scalar(104, 117, 123), false, false);

	// 执行图像分类
	Mat prob;
	net.setInput(inputBlob);
	prob = net.forward();
	vector<double> times;
	double time = net.getPerfProfile(times);
	float ms = (time * 1000) / getTickFrequency();
	printf("current inference time : %.2f ms \n", ms);

	// 得到最可能分类输出
	for (int n = 0; n < prob.rows; n++) {
		Point classNumber;
		double classProb;
		Mat probMat = prob(Rect(0, n, 1000, 1)).clone();
		Mat result = probMat.reshape(1, 1);
		minMaxLoc(result, NULL, &classProb, NULL, &classNumber);
		int classidx = classNumber.x;
		printf("\n current image classification : %s, possible : %.2f\n", labels.at(classidx).c_str(), classProb);

		// 显示文本
		putText(images[n], labels.at(classidx), Point(20, 50), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2, 8);
		imshow("Image Classification", images[n]);
		waitKey(0);

	}
	return 0;
}

std::vector<String> readClassNames()
{
	std::vector<String> classNames;

	std::ifstream fp(labels_txt_file);
	if (!fp.is_open())
	{
		printf("could not open file...\n");
		exit(-1);
	}
	std::string name;
	while (!fp.eof())
	{
		std::getline(fp, name);
		if (name.length())
			classNames.push_back(name);
	}
	fp.close();
	return classNames;
}

2.2 Python

"""
DNN单张与多张图像的推断
"""

import cv2 as cv
import numpy as np

bin_model = "bvlc_googlenet.caffemodel"
protxt = "bvlc_googlenet.prototxt"

# Load names of classes
classes = None
with open("classification_classes_ILSVRC2012.txt", 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

# load CNN model
net = cv.dnn.readNetFromCaffe(protxt, bin_model)

# read input data
image1 = cv.imread("images/dog.jpg")
image2 = cv.imread("images/airplane.jpg")
images = []
images.append(image1)
images.append(image2)
blobs = cv.dnn.blobFromImages(np.asarray(images), 1.0, (224, 224), (104, 117,123), False, crop=False)
print(blobs.shape)

# Run a model
net.setInput(blobs)
out = net.forward()
# Put efficiency information.
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
print(out.shape)

# Get a class with a highest score.
for i in range(len(out)):
    classId = np.argmax(out[i])
    confidence = out[i][classId]
    cv.putText(images[i], label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0))

    # Print predicted class.
    text_label = '%s: %.4f' % (classes[classId] if classes else 'Class #%d' % classId, confidence)
    cv.putText(images[i], text_label, (50, 50), cv.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)
    cv.namedWindow("googlenet-demo", cv.WINDOW_NORMAL)
    cv.imshow("googlenet-demo", images[i])
    cv.waitKey(0)
cv.destroyAllWindows()

3.结果展示

在这里插入图片描述
在这里插入图片描述

参考链接:
https://fanfuhan.github.io/2019/05/29/opencv-132/

评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值