C++（libtorch）调用yolov5训练得到的权重对目标进行检测

最新推荐文章于 2024-06-07 01:53:59 发布

道禅_倩

最新推荐文章于 2024-06-07 01:53:59 发布

阅读量940

点赞数 2

文章标签： YOLO c++ python

本文链接：https://blog.csdn.net/qq_51539952/article/details/132022210

版权

1.环境的配置

（1）libtorch

下载地址（libtorch的版本要与yolov5中训练的torch和cuda版本匹配，下方链接为torch==2.0.0 cuda==11.8版本的libtorch）：

https://download.pytorch.org/libtorch/cu118/libtorch-win-shared-with-deps-2.0.1%2Bcu118.zip

下方地址为不同版本的libtoch：

(177条消息) Libtorch各类版本下载---持续更新_libtorch下载_枫呱呱的博客-CSDN博客

（2）opencv

下载地址：https://opencv.org/releases.html

2.yolov5代码的训练和转换

（1）使用yolov5对数据集进行训练得到pt

（2）.torchscript.pt版本模型导出（C++可以对该权重进行加载），通过如下python代码将yolov5训练得到的pt文件转换为torchscript.pt：

"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats

Usage:
    $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 32
"""

import argparse
import sys
import time

sys.path.append('./')  # to run '$ python *.py' files in subdirectories

import torch
import torch.nn as nn

import models
from models.experimental import attempt_load
from utils.activations import Hardswish, SiLU
from utils.general import set_logging, check_img_size

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='best.pt', help='weights path')  # from yolov5/models/
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
    parser.add_argument('--batch-size', type=int, default=32, help='batch size')
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)
    set_logging()
    t = time.time()

    # Load PyTorch model
    # gpu
    model = attempt_load(opt.weights, device=torch.device('cuda'))  # load FP32 model
    labels = model.names

    # Checks
    gs = int(max(model.stride))  # grid size (max stride)
    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples

    # Input
    # gpu
    img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device='cuda')  # image size(1,3,320,192) iDetection
    model.eval()
    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
        if isinstance(m, models.common.Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.Hardswish):
                m.act = Hardswish()
            elif isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        # elif isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = False  # set Detect() layer export=True
    y = model(img)  # dry run

    # TorchScript export
    try:
        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
        f = opt.weights.replace('.pt', '.GPU_torchscript.pt')  # filename
        ts = torch.jit.trace(model, img)
        ts.save(f)
        print('TorchScript export success, saved as %s' % f)
    except Exception as e:
        print('TorchScript export failure: %s' % e)

    print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))

3.vs中环境配置

（1）opencv和libtorch在包含目录和库目录进行加载

点击项目属性，点击VC++目录

（2）在选择链接器、输入，附加依赖项中添加opencv和libtorchlib文件夹中所有的.lib：

这里我写了个python代码，直接读取路径将文件夹中后缀名为lib的名称输出来：

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import copy
from lxml.etree import Element, SubElement, tostring, ElementTree

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join

CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))

xml_path = os.path.join(CURRENT_DIR, 'D:/libtorch/lib/')

# xml list
img_xmls = os.listdir(xml_path)
for img_xml in img_xmls:
    label_name = img_xml.split('.')[1]
    if label_name == 'lib' :
        print(img_xml)

4.C++调用模型进行推理检测

下面为进行检测的C++代码

分别对摄像头、单张图像以及文件夹图像进行检测，摄像头和单张图像检测代码以注释掉了，然后读取文件夹中所有图像进行检测后的结果以原图像名称进行保存：

wenjian.cpp

#pragma once
//max.cpp
#include"wenjian.h"//" "表示是我们自己定义的头文件
#include <stdio.h>
#include <time.h>

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/ml.hpp>
#include <iostream>

#include<vector>
#include <fstream>
#include <io.h>

//#include <windows.h>

using namespace cv;
using namespace std;

//读入指定文件夹下的所有文件
void getFiles(string path, vector<string>& files, vector<string>& filenames)
{
   intptr_t hFile = 0;//intptr_t和uintptr_t是什么类型:typedef long int/ typedef unsigned long int
   struct _finddata_t fileinfo;
   string p;
   //assign方法可以理解为先将原字符串清空，然后赋予新的值作替换。
   if ((hFile = _findfirst(p.assign(path).append("\\*").c_str(), &fileinfo)) != -1)
   {
       do
       {
           if (strcmp(fileinfo.name, ".") != 0 && strcmp(fileinfo.name, "..") != 0)
               //这句有点不明白，如果不加，识别的文件里就有.和..两个文件，哪位大神可以给解释下？感激不尽！！！
           {
               files.push_back(p.assign(path).append("\\").append(fileinfo.name));
               filenames.push_back(fileinfo.name);
           }

       } while (_findnext(hFile, &fileinfo) == 0);
       _findclose(hFile);
   }
}

wenjian.h

#include<iostream>
#include<opencv2/opencv.hpp>
#include <stdio.h>
#include <time.h>

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/ml.hpp>
#include <iostream>

#include<vector>
#include <fstream>
#include <io.h>
using namespace std;
using namespace cv;

//读入指定文件夹下的所有文件
void getFiles(string path, vector<string>& files, vector<string>& filenames);

yolo.cpp

#include <torch/script.h>
#include <memory>
#include <torch/torch.h>
#include<opencv2/opencv.hpp>
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui_c.h>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <time.h>
#include"wenjian.h"

#include <fstream>
#include <string>
using namespace cv;
using namespace std;

std::vector<std::string> LoadNames(const std::string& path)
{
   // load class names
   std::vector<std::string> class_names;
   std::ifstream infile(path);
   if (infile.is_open()) {
       std::string line;
       while (std::getline(infile, line)) {
           class_names.emplace_back(line);
       }
       infile.close();
   }
   else {
       std::cerr << "Error loading the class names!\n";
   }

return class_names;
}

std::vector<float> LetterboxImage(const cv::Mat& src, cv::Mat& dst, const cv::Size& out_size)
{
   auto in_h = static_cast<float>(src.rows);
   auto in_w = static_cast<float>(src.cols);
   float out_h = out_size.height;
   float out_w = out_size.width;

float scale = std::min(out_w / in_w, out_h / in_h);

int mid_h = static_cast<int>(in_h * scale);
int mid_w = static_cast<int>(in_w * scale);

cv::resize(src, dst, cv::Size(mid_w, mid_h));

   int top = (static_cast<int>(out_h) - mid_h) / 2;
   int down = (static_cast<int>(out_h) - mid_h + 1) / 2;
   int left = (static_cast<int>(out_w) - mid_w) / 2;
   int right = (static_cast<int>(out_w) - mid_w + 1) / 2;

cv::copyMakeBorder(dst, dst, top, down, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));

std::vector<float> pad_info{ static_cast<float>(left), static_cast<float>(top), scale };
return pad_info;
}

enum Det
{
   tl_x = 0,
   tl_y = 1,
   br_x = 2,
   br_y = 3,
   score = 4,
   class_idx = 5
};

struct Detection
{
   cv::Rect bbox;
   float score;
   int class_idx;
};

void Tensor2Detection(const at::TensorAccessor<float, 2>& offset_boxes,
   const at::TensorAccessor<float, 2>& det,
   std::vector<cv::Rect>& offset_box_vec,
   std::vector<float>& score_vec)
{

   for (int i = 0; i < offset_boxes.size(0); i++) {
       offset_box_vec.emplace_back(
           cv::Rect(cv::Point(offset_boxes[i][Det::tl_x], offset_boxes[i][Det::tl_y]),
               cv::Point(offset_boxes[i][Det::br_x], offset_boxes[i][Det::br_y]))
       );
       score_vec.emplace_back(det[i][Det::score]);
   }
}

void ScaleCoordinates(std::vector<Detection>& data, float pad_w, float pad_h,
   float scale, const cv::Size& img_shape)
{
   auto clip = [](float n, float lower, float upper)
   {
       return std::max(lower, std::min(n, upper));
   };

   std::vector<Detection> detections;
   for (auto& i : data) {
       float x1 = (i.bbox.tl().x - pad_w) / scale; // x padding
       float y1 = (i.bbox.tl().y - pad_h) / scale; // y padding
       float x2 = (i.bbox.br().x - pad_w) / scale; // x padding
       float y2 = (i.bbox.br().y - pad_h) / scale; // y padding

       x1 = clip(x1, 0, img_shape.width);
       y1 = clip(y1, 0, img_shape.height);
       x2 = clip(x2, 0, img_shape.width);
       y2 = clip(y2, 0, img_shape.height);

i.bbox = cv::Rect(cv::Point(x1, y1), cv::Point(x2, y2));
}
}

torch::Tensor xywh2xyxy(const torch::Tensor& x)
{
   auto y = torch::zeros_like(x);
   // convert bounding box format from (center x, center y, width, height) to (x1, y1, x2, y2)
   y.select(1, Det::tl_x) = x.select(1, 0) - x.select(1, 2).div(2);
   y.select(1, Det::tl_y) = x.select(1, 1) - x.select(1, 3).div(2);
   y.select(1, Det::br_x) = x.select(1, 0) + x.select(1, 2).div(2);
   y.select(1, Det::br_y) = x.select(1, 1) + x.select(1, 3).div(2);
   return y;
}

std::vector<std::vector<Detection>> PostProcessing(const torch::Tensor& detections,
   float pad_w, float pad_h, float scale, const cv::Size& img_shape,
   float conf_thres, float iou_thres)
{
   /***
   * 结果纬度为batch index(0), top-left x/y (1,2), bottom-right x/y (3,4), score(5), class id(6)
   * 13*13*3*(1+4)*80
   */
   constexpr int item_attr_size = 5;
   int batch_size = detections.size(0);
   // number of classes, e.g. 80 for coco dataset
   auto num_classes = detections.size(2) - item_attr_size;

// get candidates which object confidence > threshold
auto conf_mask = detections.select(2, 4).ge(conf_thres).unsqueeze(2);

std::vector<std::vector<Detection>> output;
output.reserve(batch_size);

   // iterating all images in the batch
   for (int batch_i = 0; batch_i < batch_size; batch_i++) {
       // apply constrains to get filtered detections for current image
       auto det = torch::masked_select(detections[batch_i], conf_mask[batch_i]).view({ -1, num_classes + item_attr_size });

       // if none detections remain then skip and start to process next image
       if (0 == det.size(0)) {
           continue;
       }

// compute overall score = obj_conf * cls_conf, similar to x[:, 5:] *= x[:, 4:5]
det.slice(1, item_attr_size, item_attr_size + num_classes) *= det.select(1, 4).unsqueeze(1);

// box (center x, center y, width, height) to (x1, y1, x2, y2)
torch::Tensor box = xywh2xyxy(det.slice(1, 0, 4));

// [best class only] get the max classes score at each result (e.g. elements 5-84)
std::tuple<torch::Tensor, torch::Tensor> max_classes = torch::max(det.slice(1, item_attr_size, item_attr_size + num_classes), 1);

       // class score
       auto max_conf_score = std::get<0>(max_classes);
       // index
       auto max_conf_index = std::get<1>(max_classes);

max_conf_score = max_conf_score.to(torch::kFloat).unsqueeze(1);
max_conf_index = max_conf_index.to(torch::kFloat).unsqueeze(1);

// shape: n * 6, top-left x/y (0,1), bottom-right x/y (2,3), score(4), class index(5)
det = torch::cat({ box.slice(1, 0, 4), max_conf_score, max_conf_index }, 1);

       // for batched NMS
       constexpr int max_wh = 4096;
       auto c = det.slice(1, item_attr_size, item_attr_size + 1) * max_wh;
       auto offset_box = det.slice(1, 0, 4) + c;

std::vector<cv::Rect> offset_box_vec;
std::vector<float> score_vec;

       // copy data back to cpu
       auto offset_boxes_cpu = offset_box.cpu();
       auto det_cpu = det.cpu();
       const auto& det_cpu_array = det_cpu.accessor<float, 2>();

// use accessor to access tensor elements efficiently
Tensor2Detection(offset_boxes_cpu.accessor<float, 2>(), det_cpu_array, offset_box_vec, score_vec);

       // run NMS
       std::vector<int> nms_indices;
       cv::dnn::NMSBoxes(offset_box_vec, score_vec, conf_thres, iou_thres, nms_indices);

       std::vector<Detection> det_vec;
       for (int index : nms_indices) {
           Detection t;
           const auto& b = det_cpu_array[index];
           t.bbox =
               cv::Rect(cv::Point(b[Det::tl_x], b[Det::tl_y]),
                   cv::Point(b[Det::br_x], b[Det::br_y]));
           t.score = det_cpu_array[index][Det::score];
           t.class_idx = det_cpu_array[index][Det::class_idx];
           det_vec.emplace_back(t);
       }

ScaleCoordinates(det_vec, pad_w, pad_h, scale, img_shape);

       // save final detection for the current image
       output.emplace_back(det_vec);
   } // end of batch iterating

return output;
}

cv::Mat Demo(cv::Mat& img,
   const std::vector<std::vector<Detection>>& detections,
   const std::vector<std::string>& class_names,
   bool label = true)
{
   if (!detections.empty()) {
       for (const auto& detection : detections[0]) {
           const auto& box = detection.bbox;
           float score = detection.score;
           int class_idx = detection.class_idx;

cv::rectangle(img, box, cv::Scalar(0, 0, 255), 2);

           if (label) {
               std::stringstream ss;
               ss << std::fixed << std::setprecision(2) << score;
               std::string s = class_names[class_idx] + " " + ss.str();

               auto font_face = cv::FONT_HERSHEY_DUPLEX;
               auto font_scale = 1.0;
               int thickness = 1;
               int baseline = 0;
               auto s_size = cv::getTextSize(s, font_face, font_scale, thickness, &baseline);
               cv::rectangle(img,
                   cv::Point(box.tl().x, box.tl().y - s_size.height - 5),
                   cv::Point(box.tl().x + s_size.width, box.tl().y),
                   cv::Scalar(0, 0, 255), -1);
               cv::putText(img, s, cv::Point(box.tl().x, box.tl().y - 5),
                   font_face, font_scale, cv::Scalar(255, 255, 255), thickness);
           }
       }
   }
   return img;
   //cv::namedWindow("Result", cv::WINDOW_NORMAL);
   //cv::imshow("Result", img);

}

int main()
{
//cout << "cuda是否可用：" << torch::cuda::is_available() << endl;
//cout << "cudnn是否可用：" << torch::cuda::cudnn_is_available() << endl;

   torch::DeviceType device_type;
   device_type = torch::kCUDA;
   torch::Device device(device_type);
   torch::jit::script::Module module;
   module = torch::jit::load("best.GPU_torchscript.pt", device); //加载模型
   module.eval();

   std::vector<std::string> class_names = LoadNames("coco.names");//读取标签
   if (class_names.empty()) {
       return -1;
   }
   // set up threshold
   float conf_thres = 0.4;
   float iou_thres = 0.5;

   //视频检测
   //VideoCapture video1(0);//打开笔记本自带摄像头（1）为外接摄像头
   //video1.set(CAP_PROP_FRAME_WIDTH, 1280);
   //video1.set(CAP_PROP_FRAME_HEIGHT, 720);
   读取视频帧率
   //double rate = video1.get(CAP_PROP_FPS);
   //std::cout << "rate: " << rate << std::endl;
   当前视频帧
   //Mat frame;
   每一帧之间的延时
   //int delay = 1000 / rate;
   //bool stop(false);
   //while (!stop)
   //{
   //   double t = (double)cv::getTickCount();//开始计时

   //   if (!video1.read(frame))
   //   {
   //       std::cout << "no video frame" << std::endl;
   //       break;
   //   }

   //文件夹进行检测
   char windowname[10];
   string filePath = "D:/Deep learning/yolov5-master/images/"; //源文件夹
   string dst_filePath = "images/"; //目标文件夹

vector<string> files;
vector<string> filenames;

getFiles(filePath, files, filenames);

   int number = files.size();//文件数量
   for (int i = 0; i < number; i++)
   {
       //clock_t t_start = clock();
       string saveFilename = dst_filePath + filenames[i]; //写入部分
       Mat img = imread(files[i]);
       clock_t t_start = clock();

       //cv::Mat img = imread("right_outside_005458.bmp");
       //inference
       torch::NoGradGuard no_grad;
       cv::Mat img_input = img.clone();
       std::vector<float> pad_info = LetterboxImage(img_input, img_input, cv::Size(640, 640));
       const float pad_w = pad_info[0];
       const float pad_h = pad_info[1];
       const float scale = pad_info[2];
       cv::cvtColor(img_input, img_input, cv::COLOR_BGR2RGB); // BGR -> RGB
       //归一化需要是浮点类型
       img_input.convertTo(img_input, CV_32FC3, 1.0f / 255.0f); // normalization 1/255
       // 加载图像到设备
       auto tensor_img = torch::from_blob(img_input.data, { 1, img_input.rows, img_input.cols, img_input.channels() }).to(device_type);
       // BHWC -> BCHW
       tensor_img = tensor_img.permute({ 0, 3, 1, 2 }).contiguous(); // BHWC -> BCHW (Batch, Channel, Height, Width)
       std::vector<torch::jit::IValue> inputs;
       // 在容器尾部添加一个元素，这个元素原地构造，不需要触发拷贝构造和转移构造
       inputs.emplace_back(tensor_img);
       //start = clock();
       torch::jit::IValue output = module.forward(inputs);

       // 解析结果
       auto detections = output.toTuple()->elements()[0].toTensor();
       auto result = PostProcessing(detections, pad_w, pad_h, scale, img.size(), conf_thres, iou_thres);

       cv::Mat pre_img = Demo(img, result, class_names);
       //imwrite("结构图.jpg",pre_img);
       clock_t t_stop = clock();
       double endtime = (double)(t_stop - t_start) / CLOCKS_PER_SEC;
       imwrite(saveFilename, pre_img);
   }

//单张图像检测
//clock_t t_start = clock();

   //   cv::Mat img = imread("right_outside_005458.bmp");
   //   //inference
   //   torch::NoGradGuard no_grad;
   //   cv::Mat img_input = img.clone();
   //   std::vector<float> pad_info = LetterboxImage(img_input, img_input, cv::Size(640, 640));
   //   const float pad_w = pad_info[0];
   //   const float pad_h = pad_info[1];
   //   const float scale = pad_info[2];
   //   cv::cvtColor(img_input, img_input, cv::COLOR_BGR2RGB); // BGR -> RGB
   //   //归一化需要是浮点类型
   //   img_input.convertTo(img_input, CV_32FC3, 1.0f / 255.0f); // normalization 1/255
   //   // 加载图像到设备
   //   auto tensor_img = torch::from_blob(img_input.data, { 1, img_input.rows, img_input.cols, img_input.channels() }).to(device_type);
   //   // BHWC -> BCHW
   //   tensor_img = tensor_img.permute({ 0, 3, 1, 2 }).contiguous(); // BHWC -> BCHW (Batch, Channel, Height, Width)
   //   std::vector<torch::jit::IValue> inputs;
   //   // 在容器尾部添加一个元素，这个元素原地构造，不需要触发拷贝构造和转移构造
   //   inputs.emplace_back(tensor_img);
   //   //start = clock();
   //   torch::jit::IValue output = module.forward(inputs);
   //
   //   // 解析结果
   //   auto detections = output.toTuple()->elements()[0].toTensor();
   //   auto result = PostProcessing(detections, pad_w, pad_h, scale, img.size(), conf_thres, iou_thres);
   //

   //   cv::Mat pre_img = Demo(img, result, class_names);
   //   imwrite("结构图.jpg",pre_img);
   //   clock_t t_stop = clock();
   //   double endtime = (double)(t_stop - t_start) / CLOCKS_PER_SEC;

       //cout << "检测时间：" << endtime << endl;
       //t = ((double)cv::getTickCount() - t) / cv::getTickFrequency();//结束计时
       //int fps = int(1.0 / t);//转换为帧率

//std::cout << "FPS: " << fps << std::endl;//输出帧率

       //putText(pre_img, ("FPS: " + std::to_string(fps)), Point(0, 50), FONT_HERSHEY_COMPLEX, 0.5, Scalar(0, 0, 0));//输入到帧frame上
       //cv::namedWindow("Result", cv::WINDOW_NORMAL);
       //cv::namedWindow("Result", cv::WINDOW_AUTOSIZE);
       //cv::imshow("Result", pre_img);

       //waitKey()函数的作用是刷新imshow()展示的图片
       //if (waitKey(10) == 27)//27是键盘摁下esc时，计算机接收到的ascii码值
       //{
       //   break;
       //}
   //}
   //video1.release();
   system("pause");
   return 0;
}
检测效果：