【OpenCV进阶】加载SSD\GooglNet网络，实现图像识别

嵌小超

已于 2023-10-29 02:54:06 修改

阅读量293

点赞数 1

分类专栏： # OpenCV 文章标签：机器学习 opencv 人工智能计算机视觉

于 2021-07-20 23:44:53 首次发布

本文链接：https://blog.csdn.net/weixin_51244852/article/details/118947247

版权

OpenCV 专栏收录该内容

62 篇文章 52 订阅

订阅专栏

📢：如果你也对机器人、人工智能感兴趣，看来我们志同道合✨
📢：不妨浏览一下我的博客主页【https://blog.csdn.net/weixin_51244852】
📢：文章若有幸对你有帮助，可点赞 👍 收藏 ⭐不迷路🙉
📢：内容若有错误，敬请留言 📝指正！原创文，转载请注明出处

加载SSD

SSD，全称为Single Shot MultiBox Detector，是一种单阶段的目标检测算法。其主要特点是在进行目标检测时，只需一次前向传播就可以完成所有的预测。

SSD算法的工作流程主要包括特征提取和预测输出两个步骤。在特征提取阶段，它使用卷积神经网络对输入图像进行处理，并在不同的特征层上进行检测。这些特征层对应的是不同尺度和类别的特征图，可以有效地捕获到不同大小、不同形状的目标物体。在预测输出阶段，它会在每个特征层上生成一系列默认的边界框（bounding box）和类别概率，然后通过一个非极大值抑制（NMS）的操作来过滤掉冗余的预测结果。

此外，SSD算法的网络结构包含了基础网络（Base Network），辅助卷积层（Auxiliary Convolutions）和预测卷积层（Predicton Convolutions）。其中，基础网络通常是一个预训练的深度卷积神经网络，如VGG或者ResNet；辅助卷积层用于提供不同尺度的特征图；预测卷积层则用于生成最终的预测结果。

全部代码

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

const size_t width = 300;
const size_t height = 300;
const float meanVal = 127.5;
const float scaleFactor = 0.007843f;
const char* classNames[] = { "background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor" };

String labelFile = "D:/opencv-4.4.0/models/ssd/labelmap_det.txt";
String model_text_file = "D:/opencv-4.4.0/models/ssd/MobileNetSSD_deploy.prototxt";
String modelFile = "D:/opencv-4.4.0/models/ssd/MobileNetSSD_deploy.caffemodel";

int main(int argc, char** argv) {
	VideoCapture capture;
	capture.open(0);
	//namedWindow("input", WINDOW_AUTOSIZE);
	int w = capture.get(CAP_PROP_FRAME_WIDTH);
	int h = capture.get(CAP_PROP_FRAME_HEIGHT);
	printf("frame width : %d, frame height : %d", w, h);

	// set up net
	Net net = readNetFromCaffe(model_text_file, modelFile);
	net.setPreferableBackend(DNN_BACKEND_INFERENCE_ENGINE);
	net.setPreferableTarget(DNN_TARGET_CPU);

	Mat frame;
	while (capture.read(frame)) {
		flip(frame, frame, 1);
		//imshow("input", frame);

		//预测
		Mat inputblob = blobFromImage(frame, scaleFactor, Size(width, height), meanVal, false);
		net.setInput(inputblob, "data");
		Mat detection = net.forward("detection_out");

		//检测
		Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
		float confidence_threshold = 0.25;
		for (int i = 0; i < detectionMat.rows; i++) {
			float confidence = detectionMat.at<float>(i, 2);
			if (confidence > confidence_threshold) {
				size_t objIndex = (size_t)(detectionMat.at<float>(i, 1));
				float tl_x = detectionMat.at<float>(i, 3) * frame.cols;
				float tl_y = detectionMat.at<float>(i, 4) * frame.rows;
				float br_x = detectionMat.at<float>(i, 5) * frame.cols;
				float br_y = detectionMat.at<float>(i, 6) * frame.rows;

				Rect object_box((int)tl_x, (int)tl_y, (int)(br_x - tl_x), (int)(br_y - tl_y));
				rectangle(frame, object_box, Scalar(0, 0, 255), 2, 8, 0);
				putText(frame, format("%s:%.2f", classNames[objIndex], confidence), Point(tl_x, tl_y), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(255, 0, 0), 2);
			}
		}
		vector < double>layerstimings;
		double freq = getTickFrequency() / 1000;
		double time = net.getPerfProfile(layerstimings) / freq;
		ostringstream ss;
		ss << "FPS" << 1000 / time << ";time:" << time << "ms";
		putText(frame, ss.str(), Point(20, 20), FONT_HERSHEY_PLAIN, 1, Scalar(0, 0, 255), 2, 8);
		imshow("ssd-video-demo", frame);
		char c = waitKey(5);
		if (c == 27) { // ESC退出
			break;
		}
	}
	capture.release();//释放资源
	waitKey(0);
	return 0;
}

加载GooglNet

对模型的训练是深度学习中非常重要的部分，模型训练完成后，可以使用模型对新的数据进行处理，比如检测物体。
但是模型需要耗费大量的时间、数据，以及较高配置的设备，不妨直接使用已经训练好的模型，避免重复造轮子。
使用预训练的模型进行快速识别分类，对训练好的模型进行使用

GooglNet，更为人熟知的名字是GoogLeNet，是由Google在2014年推出的深度学习神经网络模型。这一模型是基于Inception模块构建的，标志着深度学习技术的一个重要发展节点。

GoogLeNet的核心创新是提出了Inception模块，这种结构允许网络在不同尺度上并行处理信息，从而更有效地提取和组合特征。具体来说，Inception模块的基本结构包括使用1x1的卷积进行升降维操作，以及在多个尺寸上同时进行卷积再聚合。

此外，GoogLeNet还在2014年的ImageNet竞赛中夺得了冠军，从那时起，其基础架构经过不断的改进和发展，形成了如Inception V2、Inception V3、Inception V4等多个版本。这些版本的提出都是为了解决原始模型中的一些问题，并进一步提高模型的性能和效率。

全部代码

#include <opencv2/opencv.hpp>
#include<opencv2/dnn.hpp>
#include <iostream>
#include <fstream>

using namespace cv;
using namespace cv::dnn;
using namespace std;

vector<string> readLabels();
int main(int argc, char** argv) {
// 加载googlenet网络模型
	string bin_model = "D:/opencv-4.1.0/models/googlenet/bvlc_googlenet.caffemodel";//模型文件名称
	string protxt = "D:/opencv-4.1.0/models/googlenet/bvlc_googlenet.prototxt";//配置文件名称
	Net net = readNetFromCaffe(protxt, bin_model);

	// 设置计算后台
	net.setPreferableBackend(DNN_BACKEND_OPENCV);//opencv作为计算后台
	//net.setPreferableTarget(DNN_TARGET_CPU);//cpu作为加速计算的后台
	net.setPreferableTarget(DNN_TARGET_MYRIAD);//加速棒作为加速计算的后台

	// 获取卷积神经网络各层的信息
	/*vector<string> layer_names = net.getLayerNames();
	for (int i = 0; i < layer_names.size(); i++) {
		int id = net.getLayerId(layer_names[i]);
		auto layer = net.getLayer(id);
		printf("layer id : %d, type : %s, name : %s \n", id, layer->type.c_str(), layer->name.c_str());
	}*/

	Mat src = imread("D:/images/xiaomaolu.jpg");
	imshow("input", src);
	vector<string> names = readLabels();

	// 构建输入
	int w = 224;
	int h = 224;
	Mat intpuBlob = blobFromImage(src, 1.0, Size(w, h), Scalar(117.0, 117.0, 117.0), true, false);

	// 设置输入
	net.setInput(intpuBlob);
	// 推断
	Mat probMat = net.forward();

	//解析数据
	Mat prob = probMat.reshape(1, 1);
	Point classNum;
	double classProb;
	minMaxLoc(prob, NULL, &classProb, NULL, &classNum);
	int index = classNum.x;
	printf("\n current index = %d, possible : %.2f , name : %s\n", index, classProb, names[index].c_str());
	putText(src, names[index].c_str(), Point(50, 50), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 255), 2, 8);
	imshow("result", src);
	waitKey(0);
	return 0;
}

vector<string> readLabels() {
	string label_map_txt = "D:/opencv-4.1.0/models/googlenet/classification_classes_ILSVRC2012.txt";
	vector<string> classNames;
	ifstream fp(label_map_txt);
	if (!fp.is_open()) {
		printf("could not find the file \n");
		exit(-1);
	}
	std::string name;
	while (!fp.eof()) {
		getline(fp, name);
		if (name.length()) {
			classNames.push_back(name);
		}
	}
	fp.close();
	return classNames;
}

效果展示

在这里插入图片描述

嵌小超

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
打赏
0
评论
【OpenCV进阶】加载SSD\GooglNet网络，实现图像识别

全部代码#include <opencv2/opencv.hpp>#include<opencv2/dnn.hpp>#include <iostream>#include <fstream>using namespace cv;using namespace cv::dnn;using namespace std;vector<string> readLabels();int main(int argc, char** arg
复制链接

扫一扫