提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
文章目录
- 一、模型训练
- 二、模型导出
- 三、Libtorch部署
- 总结
本文是自己通过libtorch部署yolov5后,做个笔记记录一下,方便以后使用,同时也给大家分享一下!
一、模型训练
参考这篇文章http://t.csdn.cn/2kLAD
二、模型导出
对于训练好的模型,yolov5官方提供了export.py这样一个代码用于将.pt文件导出为.torchscrip.pt,onnx等用于模型部署的文件,我们只需要里面的部分代码进行修改就行。
def parse_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--data', type=str, default=ROOT / 'data/mydata.yaml', help='dataset.yaml path')
parser.add_argument('--weights', type=str, default='./runs/train/exp6/weights/best.pt', help='weights path')
parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640, 640], help='image (h, w)')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True')
parser.add_argument('--train', action='store_true', help='model.train() mode')
parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')
parser.add_argument('--int8', action='store_true', help='CoreML/TF INT8 quantization')
parser.add_argument('--dynamic', action='store_true', help='ONNX/TF: dynamic axes')
parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')
parser.add_argument('--opset', type=int, default=13, help='ONNX: opset version')
parser.add_argument('--topk-per-class', type=int, default=100, help='TF.js NMS: topk per class to keep')
parser.add_argument('--topk-all', type=int, default=100, help='TF.js NMS: topk for all classes to keep')
parser.add_argument('--iou-thres', type=float, default=0.45, help='TF.js NMS: IoU threshold')
parser.add_argument('--conf-thres', type=float, default=0.25, help='TF.js NMS: confidence threshold')
parser.add_argument('--include', nargs='+', default=['torchscript', 'onnx'],
help='available formats are (torchscript, onnx, coreml, saved_model, pb, tflite, tfjs)')
opt = parser.parse_args()
print_args(FILE.stem, opt)
return opt
在这段代码里面 ,我们需要将'data', 'weights','device'和’include'修改成自己的,然后点击运行,就可以生成相应地文件
三、Libtorch部署
1,在opencv官网和pytorch官网下载相应的包,并解压!
2,配置环境变量
3,新建vs工程文件,并且对其属性进行配置,
在属性里面VC++目录内的包含目录进行以下配置
在属性里面VC++目录内的库目录进行以下配置
在属性的链接器里面的 输入的附加依赖项进行以下的配置
opencv_world455.lib
c10.lib
torch.lib
torch_cpu.lib
libprotobuf.lib
kernel32.lib
user32.lib
gdi32.lib
winspool.lib
shell32.lib
ole32.lib
oleaut32.lib
uuid.lib
comdlg32.lib
advapi32.lib
对于liborch代码的实现参考这篇博文http://t.csdn.cn/DjiFG
以下是我自己的实现代码
#include <opencv2/opencv.hpp>
#include <torch/script.h>
#include <algorithm>
#include <iostream>
#include <time.h>
#include <vector>
#include "preprocess.h"
#include<time.h>
std::vector<torch::Tensor> non_max_suppression(torch::Tensor preds, float score_thresh = 0.5, float iou_thresh = 0.5)
{
std::vector<torch::Tensor> output;
for (size_t i = 0; i < preds.sizes()[0]; ++i)
{
torch::Tensor pred = preds.select(0, i);
// Filter by scores
torch::Tensor scores = pred.select(1, 4) * std::get<0>(torch::max(pred.slice(1, 5, pred.sizes()[1]), 1));
pred = torch::index_select(pred, 0, torch::nonzero(scores > score_thresh).select(1, 0));
if (pred.sizes()[0] == 0) continue;
// (center_x, center_y, w, h) to (left, top, right, bottom)
pred.select(1, 0) = pred.select(1, 0) - pred.select(1, 2) / 2;
pred.select(1, 1) = pred.select(1, 1) - pred.select(1, 3) / 2;
pred.select(1, 2) = pred.select(1, 0) + pred.select(1, 2);
pred.select(1, 3) = pred.select(1, 1) + pred.select(1, 3);
// Computing scores and classes
std::tuple<torch::Tensor, torch::Tensor> max_tuple = torch::max(pred.slice(1, 5, pred.sizes()[1]), 1);
pred.select(1, 4) = pred.select(1, 4) * std::get<0>(max_tuple);
pred.select(1, 5) = std::get<1>(max_tuple);
torch::Tensor dets = pred.slice(1, 0, 6);
torch::Tensor keep = torch::empty({ dets.sizes()[0] });
torch::Tensor areas = (dets.select(1, 3) - dets.select(1, 1)) * (dets.select(1, 2) - dets.select(1, 0));
std::tuple<torch::Tensor, torch::Tensor> indexes_tuple = torch::sort(dets.select(1, 4), 0, 1);
torch::Tensor v = std::get<0>(indexes_tuple);
torch::Tensor indexes = std::get<1>(indexes_tuple);
int count = 0;
while (indexes.sizes()[0] > 0)
{
keep[count] = (indexes[0].item().toInt());
count += 1;
// Computing overlaps
torch::Tensor lefts = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor tops = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor rights = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor bottoms = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor widths = torch::empty(indexes.sizes()[0] - 1);
torch::Tensor heights = torch::empty(indexes.sizes()[0] - 1);
for (size_t i = 0; i < indexes.sizes()[0] - 1; ++i)
{
lefts[i] = std::max(dets[indexes[0]][0].item().toFloat(), dets[indexes[i + 1]][0].item().toFloat());
tops[i] = std::max(dets[indexes[0]][1].item().toFloat(), dets[indexes[i + 1]][1].item().toFloat());
rights[i] = std::min(dets[indexes[0]][2].item().toFloat(), dets[indexes[i + 1]][2].item().toFloat());
bottoms[i] = std::min(dets[indexes[0]][3].item().toFloat(), dets[indexes[i + 1]][3].item().toFloat());
widths[i] = std::max(float(0), rights[i].item().toFloat() - lefts[i].item().toFloat());
heights[i] = std::max(float(0), bottoms[i].item().toFloat() - tops[i].item().toFloat());
}
torch::Tensor overlaps = widths * heights;
// FIlter by IOUs
torch::Tensor ious = overlaps / (areas.select(0, indexes[0].item().toInt()) + torch::index_select(areas, 0, indexes.slice(0, 1, indexes.sizes()[0])) - overlaps);
indexes = torch::index_select(indexes, 0, torch::nonzero(ious <= iou_thresh).select(1, 0) + 1);
}
keep = keep.toType(torch::kInt64);
output.push_back(torch::index_select(dets, 0, keep.slice(0, 0, count)));
}
return output;
}
int main()
{
// Loading Module
clock_t start, finish;
double totaltime;
torch::jit::script::Module module = torch::jit::load("best.torchscript.pt");
torch::DeviceType device_type = at::kCPU;
module.to(device_type);
std::vector<std::string> classnames;
std::ifstream f("coco.names");
std::string name = "";
while (std::getline(f, name))
{
classnames.push_back(name);
}
std::string video = "C:\\Users\\zhaoyong\\Desktop\\test.mp4";
cv::VideoCapture cap = cv::VideoCapture(video);
//cap.set(cv::CAP_PROP_FRAME_WIDTH, 1920);
//cap.set(cv::CAP_PROP_FRAME_HEIGHT, 1080);
//cv::VideoCapture cap("C:\\Users\\zhaoyong\\Desktop\\test.mp4");
cv::Mat frame, img;
int i = 0;
while (cap.isOpened())
{
clock_t start = clock();
cap.read(frame);
//cv::imshow("Video", frame);
//if (cv::waitKey(1) == 27) break;
//if (frame.empty())
//{
// std::cout << "Read frame failed!" << std::endl;
// break;
//}
if (i % 3 != 0)
{
float r0 = frame.cols / 640.0;
start = clock();
// Preparing input tensor
//cv::resize(frame, img, cv::Size(640, 640));
int network_input_size[2] = { 640, 640 };
CenterScale matee = getAffineTransImage(frame, network_input_size);
//cv::imshow("imshow", frame);
if (cv::waitKey(1) == 27) break;
//cv::cvtColor(frame, img, cv::COLOR_BGR2RGB);
//torch::Tensor imgTensor = torch::from_blob(img.data, { img.rows, img.cols,3 }, torch::kByte);
//imgTensor = imgTensor.permute({ 2,0,1 });
//imgTensor = imgTensor.toType(torch::kFloat).to(device_type);
//imgTensor = imgTensor.div(255);
//imgTensor = imgTensor.unsqueeze(0);
//torch::Tensor preds = module.forward({ imgTensor }).toTuple()->elements()[0].toTensor();
cv::cvtColor(matee.input, img, cv::COLOR_BGR2RGB); // BGR -> RGB
img.convertTo(img, CV_32FC3, 1.0f / 255.0f); // normalization 1/255
auto imgTensor = torch::from_blob(img.data, { 1, img.rows, img.cols, img.channels() }).to(device_type);
imgTensor = imgTensor.permute({ 0, 3, 1, 2 }).contiguous(); // BHWC -> BCHW (Batch, Channel, Height, Width)
std::vector<torch::jit::IValue> inputs;
inputs.emplace_back(imgTensor);
preds: [?, 15120, 9]
module.eval();
torch::jit::IValue output = module.forward({ imgTensor });
auto preds = output.toTuple()->elements()[0].toTensor();
std::vector<torch::Tensor> dets = non_max_suppression(preds, 0.25, 0.5);
if (dets.size() > 0)
{
// Visualize result
for (size_t i = 0; i < dets[0].sizes()[0]; ++i)
{
float left = dets[0][i][0].item().toFloat() * r0;
float top = (dets[0][i][1].item().toFloat() - (640.0 - frame.rows / r0) / 2) * r0;
float right = dets[0][i][2].item().toFloat() * frame.cols / 640;
float bottom = (dets[0][i][3].item().toFloat() - (640.0 - frame.rows / r0) / 2) * r0;
float score = dets[0][i][4].item().toFloat();
int classID = dets[0][i][5].item().toInt();
cv::rectangle(frame, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(0, 255, 0), 2);
cv::putText(frame,
classnames[classID] + ": " + cv::format("%.2f", score),
cv::Point(left, top),
cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(0, 255, 0), 2);
}
}
//cv::putText(frame, "FPS: " + std::to_string(int(1e7 / (clock() - start))),
// cv::Point(50, 50),
// cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
// }
//}
finish = clock();
totaltime = (double)(finish - start) / CLOCKS_PER_SEC;
++i;
std::cout << totaltime << std::endl;
cv::imshow("nam", frame);
cv::waitKey(1);
//if (cv::waitKey(1) == 27) break;
continue;
}
else
{
++i;
std::cout << i << std::endl;
cv::imshow("nam", frame);
cv::waitKey(1);
//if (cv::waitKey(1) == 27) break;
}
}
return 0;
再援引对图片进行按比例缩放填充的代码,可以参考这篇博文
以下是我的代码
cv::Point2f get_dir(cv::Point2f src_point, float rot_rad)
{
float sn = sin(rot_rad);
float cs = cos(rot_rad);
float src_result[2] = { 0, 0 };
src_result[0] = src_point.x * cs - src_point.y * sn;
src_result[1] = src_point.x * sn + src_point.y * cs;
return{ src_result[0], src_result[1] };
}
cv::Mat get_affine_transform(cv::Point2f center, cv::Point2f scale, float rot,
int output_size[], bool inv)
{
float scale_tmp = scale.x;
float src_w = scale_tmp;
float dst_w = (float)output_size[0];
float dst_h = (float)output_size[1];
float half_1 = 0.5;
float shift[2] = { 0, 0 };
float rot_rad = 3.1415926 * rot / 180;
cv::Point2f src_point = { 0, src_w * -half_1 };
cv::Point2f src_dir = get_dir(src_point, rot_rad);
cv::Point2f dst_dir = { 0, dst_w * -half_1 };
cv::Point2f src[3];
cv::Point2f dst[3];
src[0] = { center.x + scale_tmp * shift[0], center.y + scale_tmp * shift[1] };
src[1] = { center.x + src_dir.x + scale_tmp * shift[0], center.y + src_dir.y + scale_tmp * shift[1] };
dst[0] = { dst_w * half_1, dst_h * half_1 };
dst[1] = { dst_w * half_1 + dst_dir.x, dst_h * half_1 + dst_dir.y };
src[2] = get_3rd_point(src[0], src[1]);
dst[2] = get_3rd_point(dst[0], dst[1]);
cv::Mat trans;
if (inv)
{
trans = cv::getAffineTransform(dst, src);
}
else
{
trans = cv::getAffineTransform(src, dst);
}
return trans;
}
CenterScale getAffineTransImage(const cv::Mat& img, int network_input_size[])
{
int width = img.cols;
int height = img.rows;
CenterScale center_scale;
center_scale.center = { width / (float)2.0, height / (float)2.0 };
float s = std::max(height, width) * 1.0;
center_scale.scale = { s, s };
float rolate = 0;
cv::Mat trans = get_affine_transform(center_scale.center, center_scale.scale, rolate, network_input_size, false);
cv::Mat transed_dstImage(network_input_size[0], network_input_size[1], img.type());
cv::warpAffine(img, transed_dstImage, trans, transed_dstImage.size());
CenterScale meta;
meta.input = transed_dstImage;
meta.center = center_scale.center;
meta.scale = center_scale.scale;
meta.r = 0.0;
return meta;
}
cv::Point2f get_3rd_point(cv::Point2f a, cv::Point2f b)
{
cv::Point2f direct = { a.x - b.x, a.y - b.y };
direct = { b.x - direct.y, b.y + direct.x };
return direct;
以下是我的文件配置目录
总结
liborch—cpu下的yolov5推理单张图片大概0.2秒,比较慢