(一)openvino安装说明及yolov5的onnx生成详见:
【pytorch】将yolov5模型通过openVINO2022部署至生产环境(一):python版本+异步模式
(二)c++版本代码:
使用2种图片输入读取方式;
读取.onnx模型或XML和bin文件;
yolov5_6vino.h:
#ifndef YOLOV5VINO_H
#define YOLOV5VINO_H
#include <fstream>
#include <opencv2/opencv.hpp>
#include <inference_engine.hpp>
#define NOT_NCS2
using namespace cv;
using namespace dnn;
using namespace std;
using namespace InferenceEngine;
class YOLOVINO
{
public:
struct Detection
{
int class_id;
float confidence;
Rect box;
};
public:
YOLOVINO();
~YOLOVINO();
void init();
void loadNet(bool is_cuda);
Mat formatYolov5(const Mat& source);
void detect(Mat& image, vector<Detection>& outputs);
void drawRect(Mat& image, vector<Detection>& outputs);
void loadClassList();
private:
//每个类的类别得分
float m_scoreThreshold = 0.4;
//大于该值被认为是同一个框:
float m_nmsThreshold = 0.7;
//每个框含有物体的概率
float m_confThreshold = 0.6;
//"CPU","GPU","MYRIAD"
#ifdef NCS2
const std::string m_deviceName = "MYRIAD";
const std::string m_modelFilename = "configFiles/yolov5sNCS2.xml";
#else
//plugins.xml中的定义值:
const std::string m_deviceName = "CPU";// "HETERO";
const std::string m_modelFilename = "yolov5s.onnx";
#endif // NCS2
const std::string m_classfile = "classes.txt";
size_t m_numChannels = 0;
size_t m_inputH = 0;
size_t m_inputW = 0;
size_t m_imageSize = 0;
std::string m_inputName = "";
std::string m_outputName = "";
vector<std::string> m_classNames;
const vector<Scalar> colors = { Scalar(255, 255, 0), Scalar(0, 255, 0), Scalar(0, 255, 255), Scalar(255, 0, 0) };
InferRequest::Ptr m_inferRequest;
Blob::Ptr m_inputData;
};
#endif
yolov5_6vino.cpp:
#include "yolov5_6vino.h"
#include "samples/ocv_common.hpp"
//将OpenCV Mat对象中的图像数据传给为InferenceEngine Blob对象
void frameToBlob(const cv::Mat& frame, InferRequest::Ptr& inferRequest, const std::string& inputName) {
/* 从OpenCV Mat对象中拷贝图像数据到InferenceEngine 输入Blob对象 */
Blob::Ptr frameBlob = inferRequest->GetBlob(inputName);
//这个函数方便将8U 0-255转为Blob
matU8ToBlob<uint8_t>(frame, frameBlob); //uint8_t
}
void frameToBlob2(const cv::Mat& frame, InferRequest::Ptr& inferRequest, const std::string& inputName) {
/* 从OpenCV Mat对象中拷贝图像数据到InferenceEngine 输入Blob对象 */
Blob::Ptr frameBlob = inferRequest->GetBlob(inputName);
//为适应yolo特性,将输入值除以255的新函数
matU8ToBlob2<float>(frame, frameBlob); //uint8_t
}
YOLOVINO::YOLOVINO()
{
init();
}
YOLOVINO::~YOLOVINO()
{
}
void YOLOVINO::init()
{
InferenceEngine::Core ie;
//两种加载形式,直接加载onnx,或者加载
// 下面这种是直接加载onnx:
//InferenceEngine::CNNNetwork network = ie.ReadNetwork(m_modelFilename);
InferenceEngine::CNNNetwork network = ie.ReadNetwork("exported_onnx_model.XML", "exported_onnx_model.bin");
//输入名及输入指针的数组
InferenceEngine::InputsDataMap inputs = network.getInputsInfo();
InferenceEngine::OutputsDataMap outputs = network.getOutputsInfo();
for (auto item : inputs)
{
m_inputName = item.first;
auto input_data = item.second;
input_data->setPrecision(Precision::FP32);
//默认布局与维度有关: C - for 1 - dimensional, NC - for 2 - dimensional, CHW - for 3 - dimensional, NCHW - for 4 - dimensional NCDHW - for 5 - dimensional
//这里其实无需设置
input_data->setLayout(Layout::NCHW);
//内建输入预处理,设置以应对RGB格式的输入
input_data->getPreProcess().setColorFormat(ColorFormat::RGB);
//std::cout << "input name = " << m_inputName << std::endl;
}
//输出所有输出名:
for (auto item : outputs)
{
auto output_data = item.second;
output_data->setPrecision(Precision::FP32);
m_outputName = item.first;
std::cout << "output name = " << m_outputName << std::endl;
}
//加载网络为可执行网络
auto executable_network = ie.LoadNetwork(network, m_deviceName);
m_inferRequest = executable_network.CreateInferRequestPtr();
m_inputData = m_inferRequest->GetBlob(m_inputName);
m_numChannels = m_inputData->getTensorDesc().getDims()[1];
m_inputH = m_inputData->getTensorDesc().getDims()[2];
m_inputW = m_inputData->getTensorDesc().getDims()[3];
//m_imageSize = m_inputH * m_inputW;
loadClassList();
}
//该函数将vetor中的数据按行读取
void YOLOVINO::loadClassList()
{
std::ifstream ifs(m_classfile);
std::string line;
while (getline(ifs, line))
m_classNames.push_back(line);
}
//该函数取图像最大边长作新正方形,多出部分补0
Mat YOLOVINO::formatYolov5(const Mat& source)
{
int col = source.cols;
int row = source.rows;
int max = MAX(col, row);
Mat result = Mat::zeros(max, max, CV_8UC3);
source.copyTo(result(Rect(0, 0, col, row)));
return result;
}
void YOLOVINO::detect(Mat& image, vector<Detection>& outputs)
{
cv::Mat input_image = formatYolov5(image);
cv::Mat blob_image;
//cv::resize(input_image, blob_image, cv::Size(m_inputW, m_inputH));
cvtColor(input_image, blob_image, COLOR_BGR2RGB);
//输入图像转换的两种方法:
//法1:借助安装用例自带的辅助文件samples/ocv_common.hpp
//frameToBlob(blob_image, m_inferRequest, m_inputName);
frameToBlob2(blob_image, m_inferRequest, m_inputName);
//法2:要先取输入的格式参数:
// float* data = static_cast<float*>(m_inputData->buffer());
// for (size_t row = 0; row < m_inputH; row++) {
// for (size_t col = 0; col < m_inputW; col++) {
// for (size_t ch = 0; ch < m_numChannels; ch++) {
//#ifdef NCS2
// data[m_imageSize * ch + row * m_inputW + col] = float(blob_image.at<cv::Vec3b>(row, col)[ch]);
//#else
// data[m_imageSize * ch + row * m_inputW + col] = float(blob_image.at<cv::Vec3b>(row, col)[ch] / 255.0);
//#endif // NCS2
// }
// }
// }
auto start = std::chrono::high_resolution_clock::now();
m_inferRequest->Infer();
auto output = m_inferRequest->GetBlob(m_outputName);
const float* detection_out =output->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
//维度信息
const SizeVector outputDims = output->getTensorDesc().getDims();//1,6300[25200],9
auto end = std::chrono::high_resolution_clock::now();
auto time = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
cout << "time = " << time << endl;
float x_factor = float(input_image.cols / m_inputW);
float y_factor = float(input_image.rows / m_inputH);
float* dataout = (float*)detection_out;
const int dimensions = outputDims[2];
const int rows = outputDims[1];
vector<int> class_ids;
vector<float> confidences;
vector<Rect> boxes;
//解析顺序,维度为1的可直接略掉。
//25200个9依次排列:
for (int i = 0; i < rows; ++i)
{
float confidence = dataout[4];
if (confidence >= m_confThreshold)
{
float* classes_scores = dataout + 5;
Mat scores(1, m_classNames.size(), CV_32FC1, classes_scores);
Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > m_scoreThreshold)
{
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = dataout[0];
float y = dataout[1];
float w = dataout[2];
float h = dataout[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(Rect(left, top, width, height));
}
}
dataout += dimensions;
}
vector<int> nms_result;
NMSBoxes(boxes, confidences, m_scoreThreshold, m_nmsThreshold, nms_result);
for (int i = 0; i < nms_result.size(); i++)
{
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
outputs.push_back(result);
}
}
//绘制box框
void YOLOVINO::drawRect(Mat& image, vector<Detection>& outputs)
{
int detections = outputs.size();
for (int i = 0; i < detections; ++i)
{
auto detection = outputs[i];
auto box = detection.box;
auto classId = detection.class_id;
const auto color = colors[classId % colors.size()];
rectangle(image, box, color, 3);
rectangle(image, Point(box.x, box.y - 40), Point(box.x + box.width, box.y), color, FILLED);
putText(image, m_classNames[classId].c_str(), Point(box.x, box.y - 5), FONT_HERSHEY_SIMPLEX, 1.5, Scalar(0, 0, 0), 2);
}
}
main.cpp:
#include "inference_engine.hpp"
#include "opencv2/opencv.hpp"
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include "yolov5_6vino.h"
using namespace std;
using namespace cv;
using namespace dnn;
using namespace InferenceEngine;
int main(int argc, char** argv)
{
YOLOVINO yolov5vino;
int frameCount = 0;
int fps = 0;
Mat frame;
auto start = std::chrono::high_resolution_clock::now();
VideoCapture capture("sample.mp4");
while (1)
{
capture.read(frame);
if (frame.empty())
{
break;
}
++frameCount;
//灰度图像也按RGB处理
if (frame.channels() < 3)
cvtColor(frame, frame, COLOR_GRAY2RGB);
std::vector<YOLOVINO::Detection> outputs;
yolov5vino.detect(frame, outputs);
yolov5vino.drawRect(frame, outputs);
if (frameCount >= 15)
{
auto end = std::chrono::high_resolution_clock::now();
//计算fps:
fps = frameCount * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
frameCount = 0;
start = std::chrono::high_resolution_clock::now();
}
if (fps > 0)
{
std::ostringstream fps_label;
fps_label << std::fixed << std::setprecision(2);
fps_label << "FPS: " << fps;
std::string fps_label_str = fps_label.str();
cv::putText(frame, fps_label_str.c_str(), cv::Point(20, 50), cv::FONT_HERSHEY_SIMPLEX, 2, cv::Scalar(0, 0, 255), 2);
}
cv::namedWindow("output", cv::WINDOW_NORMAL);
cv::imshow("output", frame);
if (cv::waitKey(1) != -1)
{
capture.release();
std::cout << "finished by user\n";
break;
}
}
return 1;
}
yolo的输入需要在原图片基础上除以255,更新后的辅助函数文件为:
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
/**
* @brief a header file with common samples functionality using OpenCV
* @file ocv_common.hpp
*/
#pragma once
#include <opencv2/opencv.hpp>
#include "openvino/openvino.hpp"
#include "samples/common.hpp"
/**
* @brief Sets image data stored in cv::Mat object to a given Blob object.
* @param orig_image - given cv::Mat object with an image data.
* @param blob - Blob object which to be filled by an image data.
* @param batchIndex - batch index of an image inside of the blob.
*/
template <typename T>
void matU8ToBlob(const cv::Mat& orig_image, InferenceEngine::Blob::Ptr& blob, int batchIndex = 0) {
InferenceEngine::SizeVector blobSize = blob->getTensorDesc().getDims();
const size_t width = blobSize[3];
const size_t height = blobSize[2];
const size_t channels = blobSize[1];
InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
if (!mblob) {
IE_THROW() << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
// locked memory holder should be alive all time while access to its buffer happens
auto mblobHolder = mblob->wmap();
T* blob_data = mblobHolder.as<T*>();
cv::Mat resized_image(orig_image);
if (static_cast<int>(width) != orig_image.size().width || static_cast<int>(height) != orig_image.size().height) {
cv::resize(orig_image, resized_image, cv::Size(width, height));
}
int batchOffset = batchIndex * width * height * channels;
for (size_t c = 0; c < channels; c++) {
for (size_t h = 0; h < height; h++) {
for (size_t w = 0; w < width; w++) {
blob_data[batchOffset + c * width * height + h * width + w] = resized_image.at<cv::Vec3b>(h, w)[c];
}
}
}
}
template <typename T>
void matU8ToBlob2(const cv::Mat& orig_image, InferenceEngine::Blob::Ptr& blob, int batchIndex = 0) {
InferenceEngine::SizeVector blobSize = blob->getTensorDesc().getDims();
const size_t width = blobSize[3];
const size_t height = blobSize[2];
const size_t channels = blobSize[1];
InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
if (!mblob) {
IE_THROW() << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
// locked memory holder should be alive all time while access to its buffer happens
auto mblobHolder = mblob->wmap();
T* blob_data = mblobHolder.as<T*>();
cv::Mat resized_image(orig_image);
if (static_cast<int>(width) != orig_image.size().width || static_cast<int>(height) != orig_image.size().height) {
cv::resize(orig_image, resized_image, cv::Size(width, height));
}
int batchOffset = batchIndex * width * height * channels;
for (size_t c = 0; c < channels; c++) {
for (size_t h = 0; h < height; h++) {
for (size_t w = 0; w < width; w++) {
blob_data[batchOffset + c * width * height + h * width + w] = resized_image.at<cv::Vec3b>(h, w)[c]/255.0;
}
}
}
}
/**
* @brief Wraps data stored inside of a passed cv::Mat object by new Blob pointer.
* @note: No memory allocation is happened. The blob just points to already existing
* cv::Mat data.
* @param mat - given cv::Mat object with an image data.
* @return resulting Blob pointer.
*/
static UNUSED InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat& mat) {
size_t channels = mat.channels();
size_t height = mat.size().height;
size_t width = mat.size().width;
size_t strideH = mat.step.buf[0];
size_t strideW = mat.step.buf[1];
bool is_dense = strideW == channels && strideH == channels * width;
if (!is_dense)
IE_THROW() << "Doesn't support conversion from not dense cv::Mat";
InferenceEngine::TensorDesc tDesc(InferenceEngine::Precision::U8,
{1, channels, height, width},
InferenceEngine::Layout::NHWC);
return InferenceEngine::make_shared_blob<uint8_t>(tDesc, mat.data);
}
static UNUSED ov::Tensor wrapMat2Tensor(const cv::Mat& mat) {
const size_t channels = mat.channels();
const size_t height = mat.size().height;
const size_t width = mat.size().width;
const size_t strideH = mat.step.buf[0];
const size_t strideW = mat.step.buf[1];
const bool is_dense = strideW == channels && strideH == channels * width;
OPENVINO_ASSERT(is_dense, "Doesn't support conversion from not dense cv::Mat");
return ov::Tensor(ov::element::u8, ov::Shape{1, height, width, channels}, mat.data);
}
最终测试效果: