C++使用opencv4.0调用tensorflow训练的ssd_mobilenet_v1_coco_2017_11_17模型并进行物体识别

本文链接：https://blog.csdn.net/atpalain_csdn/article/details/100098720

C++使用opencv4.0调用tensorflow训练的ssd_mobilenet_v1_coco_2017_11_17模型并进行物体识别

安装所需软件/库
Step0. 确保已安装python或Anaconda3
Step1. 生成opencv可调用的pbtxt文件
Step2. 调用模型并识别物体

参考资料：

安装所需软件/库

opencv4.0.0.

Step0. 确保已安装python或Anaconda3

Step1. 生成opencv可调用的pbtxt文件

从网盘的 tensorflow\pb2pbtxt 目录中下载以下4个文件（点此链接）：

tf_text_graph_common.py
tf_text_graph_faster_rcnn.py
tf_text_graph_mask_rcnn.py
tf_text_graph_ssd.py

先别关闭网盘，后面还要下载其他文件，如果嫌麻烦，也可将所有文件都下载下来备用。
接着从网盘的 tensorflow\ssd_mobilenet_v1_coco_2017_11_17 目录中下载模型文件 frozen_inference_graph.pb 和配置文件 ssd_mobilenet_v1_coco.config。

在任意位置新建一个文件夹，可命名为 pb2pbtxt 。把刚才6个文件放入该文件夹中，如下图所示：

在这里插入图片描述
在文件夹空白处按住Shift+鼠标右键，选择 Open PowerShell window here，打开 PowerShell 命令窗口。

在这里插入图片描述

在命令窗口中输入：

python tf_text_graph_ssd.py --input frozen_inference_graph.pb --config ssd_mobilenet_v1_coco.config --output ssd_mobilenet_v1_coco_2017_11_17.pbtxt

在这里插入图片描述
在此目录下生成了 ssd_mobilenet_v1_coco_2017_11_17.pbtxt 文件，如下图所示：

在这里插入图片描述
该文件即为opencv可调用的 .pbtxt 文件，其部分内容如下图所示：

在这里插入图片描述

pencv可调用的pbtxt文件生成完毕！

也许有些人会觉得该步骤有些多余，这个 .pbtxt 文件完全可以从网上下载，不需要自己重新生成。确实没错，我们可以从 How to load Tensorflow models with OpenCV 这个网页中下载不同模型对应的 .pbtxt 文件，就像里面说到的，opencv社区已经帮我们完成了这项工作。

下图是该网页的部分内容：

上面的红框部分是刚才我们下载的4个 .py 文件；
weights 是模型的下载链接，其中包含了 .pb 模型文件；
config 则是已经生成好的，opencv可调用的 .pbtxt 文件。

在这里插入图片描述

所以，对于上面列出的模型，我们都可以直接去该网站下载，不需要自行生成。
这一步骤的作用在于，如果我们训练了自己的模型，又想用opencv的dnn来测试，那么我们就必须得自己去完成 .pbtxt 的生成工作。

Step2. 调用模型并识别物体

从网盘 tensorflow 目录中下载 object_detection_classes_coco.txt文件，并从 tensorflow\test_images目录中下载测试图片 image1.jpg（点此链接）。
新建vs工程，将以下4个文件复制到与 .cpp 同级的目录下：

frozen_inference_graph.pb;
ssd_mobilenet_v1_coco_2017_11_17.pbtxt;
object_detection_classes_coco.txt;
image1.jpg.

如下图所示：

在这里插入图片描述
物体识别测试程序代码如下：

#include "stdafx.h"
#include <fstream>
#include <sstream>

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

using namespace cv;
using namespace dnn;

float confThreshold, nmsThreshold;
std::vector<std::string> classes;

void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);

int main(int argc, char** argv)
{
	// 根据选择的检测模型文件进行配置
	confThreshold = 0.5;
	nmsThreshold = 0.4;

	float scale = 1.0;
	Scalar mean = { 0, 0, 0 };
	bool swapRB = true;
	int inpWidth = 300;
	int inpHeight = 300;

	String modelPath = "frozen_inference_graph.pb";
	String configPath = "ssd_mobilenet_v1_coco_2017_11_17.pbtxt";
	String framework = "";

	int backendId = cv::dnn::DNN_BACKEND_OPENCV;
	int targetId = cv::dnn::DNN_TARGET_CPU;
	
	String classesFile = R"(object_detection_classes_coco.txt)";

	// Open file with classes names.
	if (!classesFile.empty()) {
		const std::string& file = classesFile;
		std::ifstream ifs(file.c_str());
		if (!ifs.is_open())
			CV_Error(Error::StsError, "File " + file + " not found");
		std::string line;
		while (std::getline(ifs, line)) {
			classes.push_back(line);
		}
	}

	// Load a model.
	Net net = readNet(modelPath, configPath, framework);
	net.setPreferableBackend(backendId);
	net.setPreferableTarget(targetId);

	std::vector<String> outNames = net.getUnconnectedOutLayersNames();

	// Create a window
	static const std::string kWinName = "Deep learning object detection in OpenCV";

	// Process frames.
	Mat frame, blob;
	frame = imread("image1.jpg");
	
	// Create a 4D blob from a frame.
	Size inpSize(inpWidth > 0 ? inpWidth : frame.cols,
		inpHeight > 0 ? inpHeight : frame.rows);
	blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false);

	// Run a model.
	net.setInput(blob);
	if (net.getLayer(0)->outputNameToIndex("im_info") != -1)  // Faster-RCNN or R-FCN
	{
		resize(frame, frame, inpSize);
		Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f);
		net.setInput(imInfo, "im_info");
	}

	std::vector<Mat> outs;
	net.forward(outs, outNames);

	postprocess(frame, outs, net);

	// Put efficiency information.
	std::vector<double> layersTimes;
	double freq = getTickFrequency() / 1000;
	double t = net.getPerfProfile(layersTimes) / freq;
	std::string label = format("Inference time: %.2f ms", t);
	putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));

	imshow(kWinName, frame);
	waitKey(0);
	
	return 0;
}

void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
{
	static std::vector<int> outLayers = net.getUnconnectedOutLayers();
	static std::string outLayerType = net.getLayer(outLayers[0])->type;

	std::vector<int> classIds;
	std::vector<float> confidences;
	std::vector<Rect> boxes;
	if (net.getLayer(0)->outputNameToIndex("im_info") != -1)  // Faster-RCNN or R-FCN
	{
		// Network produces output blob with a shape 1x1xNx7 where N is a number of
		// detections and an every detection is a vector of values
		// [batchId, classId, confidence, left, top, right, bottom]
		CV_Assert(outs.size() == 1);
		float* data = (float*)outs[0].data;
		for (size_t i = 0; i < outs[0].total(); i += 7) {
			float confidence = data[i + 2];
			if (confidence > confThreshold) {
				int left = (int)data[i + 3];
				int top = (int)data[i + 4];
				int right = (int)data[i + 5];
				int bottom = (int)data[i + 6];
				int width = right - left + 1;
				int height = bottom - top + 1;
				classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
				boxes.push_back(Rect(left, top, width, height));
				confidences.push_back(confidence);
			}
		}
	}
	else if (outLayerType == "DetectionOutput") {
		// Network produces output blob with a shape 1x1xNx7 where N is a number of
		// detections and an every detection is a vector of values
		// [batchId, classId, confidence, left, top, right, bottom]
		CV_Assert(outs.size() == 1);
		float* data = (float*)outs[0].data;
		for (size_t i = 0; i < outs[0].total(); i += 7) {
			float confidence = data[i + 2];
			if (confidence > confThreshold) {
				int left = (int)(data[i + 3] * frame.cols);
				int top = (int)(data[i + 4] * frame.rows);
				int right = (int)(data[i + 5] * frame.cols);
				int bottom = (int)(data[i + 6] * frame.rows);
				int width = right - left + 1;
				int height = bottom - top + 1;
				classIds.push_back((int)(data[i + 1]) - 1);  // Skip 0th background class id.
				boxes.push_back(Rect(left, top, width, height));
				confidences.push_back(confidence);
			}
		}
	}
	else if (outLayerType == "Region") {
		for (size_t i = 0; i < outs.size(); ++i) {
			// Network produces output blob with a shape NxC where N is a number of
			// detected objects and C is a number of classes + 4 where the first 4
			// numbers are [center_x, center_y, width, height]
			float* data = (float*)outs[i].data;
			for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) {
				Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
				Point classIdPoint;
				double confidence;
				minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
				if (confidence > confThreshold) {
					int centerX = (int)(data[0] * frame.cols);
					int centerY = (int)(data[1] * frame.rows);
					int width = (int)(data[2] * frame.cols);
					int height = (int)(data[3] * frame.rows);
					int left = centerX - width / 2;
					int top = centerY - height / 2;

					classIds.push_back(classIdPoint.x);
					confidences.push_back((float)confidence);
					boxes.push_back(Rect(left, top, width, height));
				}
			}
		}
	}
	else
		CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);

	std::vector<int> indices;
	NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
	for (size_t i = 0; i < indices.size(); ++i) {
		int idx = indices[i];
		Rect box = boxes[idx];
		drawPred(classIds[idx], confidences[idx], box.x, box.y,
			box.x + box.width, box.y + box.height, frame);
	}
}

void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
{
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0));

	std::string label = format("%.2f", conf);
	if (!classes.empty()) {
		CV_Assert(classId < (int)classes.size());
		label = classes[classId] + ": " + label;
	}

	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

	top = max(top, labelSize.height);
	rectangle(frame, Point(left, top - labelSize.height),
		Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED);
	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar());
}

先配置好opencv，再运行以上程序，没有意外，将得到以下结果：

在这里插入图片描述