yolov5 + libtorch1.9 +vs2017 模型部署C++

yolov5 + libtorch1.9 +vs2017 模型部署C++

一、模型转换:在C++中调用pytorch模型需要转换成torchscript。

1.pt模型转换torchscript模型代码

import argparse
import sys
import time

sys.path.append('./')  # to run '$ python *.py' files in subdirectories

import torch
import torch.nn as nn

import models
from models.experimental import attempt_load
from utils.activations import Hardswish, SiLU
from utils.general import set_logging, check_img_size
from utils.torch_utils import select_device

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')  # from yolov5/models/
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
    parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    print(opt)
    set_logging()
    t = time.time()

    # Load PyTorch model
    device = select_device(opt.device)
    model = attempt_load(opt.weights, map_location=device)  # load FP32 model
    labels = model.names

    # Checks
    gs = int(max(model.stride))  # grid size (max stride)
    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples

    # Input
    img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device)  # image size(1,3,320,192) iDetection

    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
        if isinstance(m, models.common.Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.Hardswish):
                m.act = Hardswish()
            elif isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        # elif isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = not opt.grid  # set Detect() layer grid export
    y = model(img)  # dry run

    # TorchScript export
    try:
        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
        ts = torch.jit.trace(model, img)
        ts.save(f)
        print('TorchScript export success, saved as %s' % f)
    except Exception as e:
        print('TorchScript export failure: %s' % e)

    # ONNX export
    try:
        import onnx

        print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
        f = opt.weights.replace('.pt', '.onnx')  # filename
        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
                          output_names=['classes', 'boxes'] if y is None else ['output'],
                          dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # size(1,3,640,640)
                                        'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)

        # Checks
        onnx_model = onnx.load(f)  # load onnx model
        onnx.checker.check_model(onnx_model)  # check onnx model
        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
        print('ONNX export success, saved as %s' % f)
    except Exception as e:
        print('ONNX export failure: %s' % e)

    # CoreML export
    try:
        import coremltools as ct

        print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
        # convert model from torchscript and apply pixel scaling as per detect.py
        model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
        f = opt.weights.replace('.pt', '.mlmodel')  # filename
        model.save(f)
        print('CoreML export success, saved as %s' % f)
    except Exception as e:
        print('CoreML export failure: %s' % e)

    # Finish
    print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))

2.转换命令

python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1

二、VS 2017 配置libtorch

1.下载libtorch libtorch-win-shared-with-deps-1.9.0+cpu.zip
Compute Platform 中可以选择CUDA10.2 、CUDA11.1 及CPU,本文采用CPU部署。如果需要CUDA9等其他更低的版本,可以从libtorch的历史版本获取。
在这里插入图片描述
2.打开Visual Studio 2017,新建项目
在这里插入图片描述

3.项目->属性->C/C++属性->常规->附加包含目录 ,添加libtorch的头文件路径
在这里插入图片描述
4.项目->属性->链接器->常规->附加库目录,添加libtorch的库路径
在这里插入图片描述
5…项目->属性->链接器->输入->附加依赖项,添加lib名称。

c10.lib
kineto.lib
torch.lib
torch_cpu.lib
opencv_world452.lib
kernel32.lib
user32.lib
gdi32.lib
winspool.lib
shell32.lib
ole32.lib
oleaut32.lib
uuid.lib
comdlg32.lib
advapi32.lib

6.运行代码如下:

#include <opencv2/opencv.hpp>
#include <torch/script.h>
#include <torch/torch.h>
#include <algorithm>
#include <iostream>
#include <time.h>
#include <memory>

std::vector<torch::Tensor> non_max_suppression(torch::Tensor preds, float score_thresh = 0.5, float iou_thresh = 0.35)
{
	std::vector<torch::Tensor> output;
	for (size_t i = 0; i < preds.sizes()[0]; ++i)
	{
		torch::Tensor pred = preds.select(0, i);

		//GPU推理结果为cuda数据类型,nms之前要转成cpu,否则会报错
		pred = pred.to(at::kCPU); //增加到函数里pred = pred.to(at::kCPU); 注意preds的数据类型,转成cpu进行后处理。

		// Filter by scores
		torch::Tensor scores = pred.select(1, 4) * std::get<0>(torch::max(pred.slice(1, 5, pred.sizes()[1]), 1));
		pred = torch::index_select(pred, 0, torch::nonzero(scores > score_thresh).select(1, 0));
		if (pred.sizes()[0] == 0) continue;

		// (center_x, center_y, w, h) to (left, top, right, bottom)
		pred.select(1, 0) = pred.select(1, 0) - pred.select(1, 2) / 2;
		pred.select(1, 1) = pred.select(1, 1) - pred.select(1, 3) / 2;
		pred.select(1, 2) = pred.select(1, 0) + pred.select(1, 2);
		pred.select(1, 3) = pred.select(1, 1) + pred.select(1, 3);

		// Computing scores and classes
		std::tuple<torch::Tensor, torch::Tensor> max_tuple = torch::max(pred.slice(1, 5, pred.sizes()[1]), 1);
		pred.select(1, 4) = pred.select(1, 4) * std::get<0>(max_tuple);
		pred.select(1, 5) = std::get<1>(max_tuple);

		torch::Tensor  dets = pred.slice(1, 0, 6);

		torch::Tensor keep = torch::empty({ dets.sizes()[0] });
		torch::Tensor areas = (dets.select(1, 3) - dets.select(1, 1)) * (dets.select(1, 2) - dets.select(1, 0));
		std::tuple<torch::Tensor, torch::Tensor> indexes_tuple = torch::sort(dets.select(1, 4), 0, 1);
		torch::Tensor v = std::get<0>(indexes_tuple);
		torch::Tensor indexes = std::get<1>(indexes_tuple);
		int count = 0;
		while (indexes.sizes()[0] > 0)
		{
			keep[count] = (indexes[0].item().toInt());
			count += 1;

			// Computing overlaps
			torch::Tensor lefts = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor tops = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor rights = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor bottoms = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor widths = torch::empty(indexes.sizes()[0] - 1);
			torch::Tensor heights = torch::empty(indexes.sizes()[0] - 1);
			for (size_t i = 0; i < indexes.sizes()[0] - 1; ++i)
			{
				lefts[i] = std::max(dets[indexes[0]][0].item().toFloat(), dets[indexes[i + 1]][0].item().toFloat());
				tops[i] = std::max(dets[indexes[0]][1].item().toFloat(), dets[indexes[i + 1]][1].item().toFloat());
				rights[i] = std::min(dets[indexes[0]][2].item().toFloat(), dets[indexes[i + 1]][2].item().toFloat());
				bottoms[i] = std::min(dets[indexes[0]][3].item().toFloat(), dets[indexes[i + 1]][3].item().toFloat());
				widths[i] = std::max(float(0), rights[i].item().toFloat() - lefts[i].item().toFloat());
				heights[i] = std::max(float(0), bottoms[i].item().toFloat() - tops[i].item().toFloat());
			}
			torch::Tensor overlaps = widths * heights;

			// FIlter by IOUs
			torch::Tensor ious = overlaps / (areas.select(0, indexes[0].item().toInt()) + torch::index_select(areas, 0, indexes.slice(0, 1, indexes.sizes()[0])) - overlaps);
			indexes = torch::index_select(indexes, 0, torch::nonzero(ious <= iou_thresh).select(1, 0) + 1);
		}
		keep = keep.toType(torch::kInt64);
		output.push_back(torch::index_select(dets, 0, keep.slice(0, 0, count)));
	}
	return output;
}

torch::Tensor unet_data_preprocess(cv::Mat &image) {
	cv::cvtColor(image, image, cv::COLOR_BGR2RGB);

	//int w = image.cols;
	//int h = image.rows;
	int newW = 640;
	int newH = 640;

	cv::Mat img_processed;
	cv::resize(image, img_processed, cv::Size(newW, newH));

	//cv::imshow("img_processed", img_processed);
	//cv::waitKey(0);

	torch::Tensor imgtransform;
	imgtransform = torch::from_blob(img_processed.data, { 1,newH,newW,3 }, torch::kByte);
	imgtransform = imgtransform.permute({ 0,3,1,2 });
	imgtransform = imgtransform.to(torch::kFloat);
	imgtransform = imgtransform.div(255.0);

	return imgtransform;
}
int main(int argc, char* argv[])
{
	std::cout << "cuda::is_available():" << torch::cuda::is_available() << std::endl;
	torch::DeviceType device_type = at::kCPU; // 定义设备类型
	if (torch::cuda::is_available())
		device_type = at::kCUDA;
	// Loading  Module
	torch::jit::script::Module module = torch::jit::load("best.torchscript.pt");//best.torchscript3.pt //模型路径
	module.to(device_type); // 模型加载至CPU
	module.eval();   //表示只需要推理face

	std::vector<std::string> classnames = {"face","bike","tree"};  //检测目标的名称

    cv::String path = "图片路径";
	std::vector<cv::String> m_vImgPath;
	cv::glob(path, m_vImgPath);

	for (int j = 0; j < m_vImgPath.size(); ++j)
	{
		clock_t start = clock();

		// Preparing input tensor
		cv::Mat src = cv::imread(m_vImgPath[j]);
		if (src.empty())
		{
			std::cout << "Read frame failed!" << std::endl;
			return -1;
		}
		clock_t readTime = clock();
		std::cout << "ReadImgTime: " << std::to_string(readTime - start) << "ms  ";
	
		torch::Tensor imgTensor = unet_data_preprocess(src);

		std::vector<torch::jit::IValue> inputs;
		inputs.emplace_back(imgTensor);
		clock_t TransTime = clock();
		std::cout << "TransImgTime: " << std::to_string(TransTime-readTime ) << "ms  ";
		// preds: [?, 15120, 9]
		torch::jit::IValue output = module.forward(inputs);
	
		auto preds = output.toTuple()->elements()[0].toTensor();
		
		clock_t DealTime = clock();
		std::cout << "DealImgTime: " << std::to_string(DealTime-TransTime) << "ms  ";

		std::vector<torch::Tensor> dets = non_max_suppression(preds, 0.5, 0.5);



		if (dets.size() > 0)
		{
			// Visualize result
			for (size_t i = 0; i < dets[0].sizes()[0]; ++i)
			{
				float left = dets[0][i][0].item().toFloat() * src.cols / 640;
				float top = dets[0][i][1].item().toFloat() * src.rows / 640;
				float right = dets[0][i][2].item().toFloat() * src.cols / 640;
				float bottom = dets[0][i][3].item().toFloat() * src.rows / 640;
				float score = dets[0][i][4].item().toFloat();
				int classID = dets[0][i][5].item().toInt();

				cv::rectangle(src, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(0, 255, 0), 2);

				cv::putText(src,
					classnames[classID] + ": " + cv::format("%.2f", score),
					cv::Point(left, top),
					cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(0, 255, 0), 2);
			}
		}
		clock_t postProcessTime = clock();
		std::cout << "postProcessTime: " << std::to_string(postProcessTime-DealTime ) <<"ms  ";
		clock_t end = clock();
		double time = end - start;
		std::cout << "-[INFO] Frame:" << std::to_string(time) << std::endl;
	}

7.运行结果
在这里插入图片描述

评论 10
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值