yolov5 + libtorch1.9 +vs2017 模型部署C++
一、模型转换:在C++中调用pytorch模型需要转换成torchscript。
1.pt模型转换torchscript模型代码
import argparse
import sys
import time
sys.path.append('./') # to run '$ python *.py' files in subdirectories
import torch
import torch.nn as nn
import models
from models.experimental import attempt_load
from utils.activations import Hardswish, SiLU
from utils.general import set_logging, check_img_size
from utils.torch_utils import select_device
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
parser.add_argument('--grid', action='store_true', help='export Detect() layer grid')
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
opt = parser.parse_args()
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
print(opt)
set_logging()
t = time.time()
# Load PyTorch model
device = select_device(opt.device)
model = attempt_load(opt.weights, map_location=device) # load FP32 model
labels = model.names
# Checks
gs = int(max(model.stride)) # grid size (max stride)
opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples
# Input
img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device) # image size(1,3,320,192) iDetection
# Update model
for k, m in model.named_modules():
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
if isinstance(m, models.common.Conv): # assign export-friendly activations
if isinstance(m.act, nn.Hardswish):
m.act = Hardswish()
elif isinstance(m.act, nn.SiLU):
m.act = SiLU()
# elif isinstance(m, models.yolo.Detect):
# m.forward = m.forward_export # assign forward (optional)
model.model[-1].export = not opt.grid # set Detect() layer grid export
y = model(img) # dry run
# TorchScript export
try:
print('\nStarting TorchScript export with torch %s...' % torch.__version__)
f = opt.weights.replace('.pt', '.torchscript.pt') # filename
ts = torch.jit.trace(model, img)
ts.save(f)
print('TorchScript export success, saved as %s' % f)
except Exception as e:
print('TorchScript export failure: %s' % e)
# ONNX export
try:
import onnx
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
f = opt.weights.replace('.pt', '.onnx') # filename
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
output_names=['classes', 'boxes'] if y is None else ['output'],
dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
# Checks
onnx_model = onnx.load(f) # load onnx model
onnx.checker.check_model(onnx_model) # check onnx model
# print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
print('ONNX export success, saved as %s' % f)
except Exception as e:
print('ONNX export failure: %s' % e)
# CoreML export
try:
import coremltools as ct
print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
# convert model from torchscript and apply pixel scaling as per detect.py
model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
f = opt.weights.replace('.pt', '.mlmodel') # filename
model.save(f)
print('CoreML export success, saved as %s' % f)
except Exception as e:
print('CoreML export failure: %s' % e)
# Finish
print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
2.转换命令
python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
二、VS 2017 配置libtorch
1.下载libtorch libtorch-win-shared-with-deps-1.9.0+cpu.zip
Compute Platform 中可以选择CUDA10.2 、CUDA11.1 及CPU,本文采用CPU部署。如果需要CUDA9等其他更低的版本,可以从libtorch的历史版本获取。
2.打开Visual Studio 2017,新建项目
3.项目->属性->C/C++属性->常规->附加包含目录 ,添加libtorch的头文件路径
4.项目->属性->链接器->常规->附加库目录,添加libtorch的库路径
5…项目->属性->链接器->输入->附加依赖项,添加lib名称。
c10.lib
kineto.lib
torch.lib
torch_cpu.lib
opencv_world452.lib
kernel32.lib
user32.lib
gdi32.lib
winspool.lib
shell32.lib
ole32.lib
oleaut32.lib
uuid.lib
comdlg32.lib
advapi32.lib
6.运行代码如下:
#include <opencv2/opencv.hpp>
#include <torch/script.h>
#include <torch/torch.h>
#include <algorithm>
#include <iostream>
#include <time.h>
#include <memory>
std::vector<torch::Tensor> non_max_suppression(torch::Tensor preds, float score_thresh = 0.5, float iou_thresh = 0.35)
{
std::vector<torch::Tensor> output;
for (size_t i = 0; i < preds.sizes()[0]; ++i)
{
torch::Tensor pred = preds.select(0, i);
//GPU推理结果为cuda数据类型,nms之前要转成cpu,否则会报错
pred = pred.to(at::kCPU); //增加到函数里pred = pred.to(at::kCPU); 注意preds的数据类型,转成cpu进行后处理。
// Filter by scores
torch::Tensor scores = pred.select(1, 4) * std::get<0>(torch::max(pred.slice(1, 5, pred.sizes()[1]), 1));
pred = torch::index_select(pred, 0, torch::nonzero(scores > score_thresh).select(1, 0));
if (pred.sizes()[0] == 0) continue;
// (center_x, center_y, w, h) to (left, top, right, bottom)
pred.select(1, 0) = pred.select(1, 0) - pred.select(1, 2) / 2;
pred.select(1, 1) = pred.select(1, 1) - pred.select(1, 3) / 2;
pred.select(1, 2) = pred.select(1, 0) + pred.select(1, 2);
pred.select(1, 3) = pred.select(1, 1) + pred.select(1, 3);
// Computing scores and classes
std::tuple<torch::Tensor, torch::Tensor> max_tuple = torch::max(pred.slice(1, 5, pred.sizes()[1]), 1);
pred.select(1, 4) = pred.select(1, 4) * std::get<0>(max_tuple);
pred.select(1, 5) = std::get<1>(max_tuple);
torch::Tensor dets = pred.slice(1, 0, 6);
torch::Tensor keep = torch::empty({ dets.sizes()[0] });
torch::Tensor areas = (dets.select(1, 3) - dets.select(1, 1)) * (dets.select(1, 2) - dets.select(1, 0));
std::tuple<torch::Tensor, torch::Tensor> indexes_tuple = torch::sort(dets.select(1, 4), 0, 1);
torch::Tensor v = std::get<0>(indexes_tuple);
torch::Tensor indexes = std::get<1>(indexes_tuple);
int count = 0;
while (indexes.sizes()[0] > 0)
{
keep[count] = (indexes[0].item().toInt());
count += 1;
// Computing overlaps
torch::Tensor lefts = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor tops = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor rights = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor bottoms = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor widths = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor heights = torch::empty(indexes.sizes()[0] - 1);
for (size_t i = 0; i < indexes.sizes()[0] - 1; ++i)
{
lefts[i] = std::max(dets[indexes[0]][0].item().toFloat(), dets[indexes[i + 1]][0].item().toFloat());
tops[i] = std::max(dets[indexes[0]][1].item().toFloat(), dets[indexes[i + 1]][1].item().toFloat());
rights[i] = std::min(dets[indexes[0]][2].item().toFloat(), dets[indexes[i + 1]][2].item().toFloat());
bottoms[i] = std::min(dets[indexes[0]][3].item().toFloat(), dets[indexes[i + 1]][3].item().toFloat());
widths[i] = std::max(float(0), rights[i].item().toFloat() - lefts[i].item().toFloat());
heights[i] = std::max(float(0), bottoms[i].item().toFloat() - tops[i].item().toFloat());
}
torch::Tensor overlaps = widths * heights;
// FIlter by IOUs
torch::Tensor ious = overlaps / (areas.select(0, indexes[0].item().toInt()) + torch::index_select(areas, 0, indexes.slice(0, 1, indexes.sizes()[0])) - overlaps);
indexes = torch::index_select(indexes, 0, torch::nonzero(ious <= iou_thresh).select(1, 0) + 1);
}
keep = keep.toType(torch::kInt64);
output.push_back(torch::index_select(dets, 0, keep.slice(0, 0, count)));
}
return output;
}
torch::Tensor unet_data_preprocess(cv::Mat &image) {
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
//int w = image.cols;
//int h = image.rows;
int newW = 640;
int newH = 640;
cv::Mat img_processed;
cv::resize(image, img_processed, cv::Size(newW, newH));
//cv::imshow("img_processed", img_processed);
//cv::waitKey(0);
torch::Tensor imgtransform;
imgtransform = torch::from_blob(img_processed.data, { 1,newH,newW,3 }, torch::kByte);
imgtransform = imgtransform.permute({ 0,3,1,2 });
imgtransform = imgtransform.to(torch::kFloat);
imgtransform = imgtransform.div(255.0);
return imgtransform;
}
int main(int argc, char* argv[])
{
std::cout << "cuda::is_available():" << torch::cuda::is_available() << std::endl;
torch::DeviceType device_type = at::kCPU; // 定义设备类型
if (torch::cuda::is_available())
device_type = at::kCUDA;
// Loading Module
torch::jit::script::Module module = torch::jit::load("best.torchscript.pt");//best.torchscript3.pt //模型路径
module.to(device_type); // 模型加载至CPU
module.eval(); //表示只需要推理face
std::vector<std::string> classnames = {"face","bike","tree"}; //检测目标的名称
cv::String path = "图片路径";
std::vector<cv::String> m_vImgPath;
cv::glob(path, m_vImgPath);
for (int j = 0; j < m_vImgPath.size(); ++j)
{
clock_t start = clock();
// Preparing input tensor
cv::Mat src = cv::imread(m_vImgPath[j]);
if (src.empty())
{
std::cout << "Read frame failed!" << std::endl;
return -1;
}
clock_t readTime = clock();
std::cout << "ReadImgTime: " << std::to_string(readTime - start) << "ms ";
torch::Tensor imgTensor = unet_data_preprocess(src);
std::vector<torch::jit::IValue> inputs;
inputs.emplace_back(imgTensor);
clock_t TransTime = clock();
std::cout << "TransImgTime: " << std::to_string(TransTime-readTime ) << "ms ";
// preds: [?, 15120, 9]
torch::jit::IValue output = module.forward(inputs);
auto preds = output.toTuple()->elements()[0].toTensor();
clock_t DealTime = clock();
std::cout << "DealImgTime: " << std::to_string(DealTime-TransTime) << "ms ";
std::vector<torch::Tensor> dets = non_max_suppression(preds, 0.5, 0.5);
if (dets.size() > 0)
{
// Visualize result
for (size_t i = 0; i < dets[0].sizes()[0]; ++i)
{
float left = dets[0][i][0].item().toFloat() * src.cols / 640;
float top = dets[0][i][1].item().toFloat() * src.rows / 640;
float right = dets[0][i][2].item().toFloat() * src.cols / 640;
float bottom = dets[0][i][3].item().toFloat() * src.rows / 640;
float score = dets[0][i][4].item().toFloat();
int classID = dets[0][i][5].item().toInt();
cv::rectangle(src, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(0, 255, 0), 2);
cv::putText(src,
classnames[classID] + ": " + cv::format("%.2f", score),
cv::Point(left, top),
cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(0, 255, 0), 2);
}
}
clock_t postProcessTime = clock();
std::cout << "postProcessTime: " << std::to_string(postProcessTime-DealTime ) <<"ms ";
clock_t end = clock();
double time = end - start;
std::cout << "-[INFO] Frame:" << std::to_string(time) << std::endl;
}
7.运行结果