YOLO系列PB模型推理C++,支持V3、V4
模型转换(weights->pb)
weights code: https://github.com/AlexeyAB/darknet
转换code https://github.com/TNTWEN/OpenVINO-YOLOV4
weights转换为pb模型时,参照转换code的py文件操作即可。
inference cpp代码
// 编译tensorflow详见其他博客
#include <fstream>
#include <sstream>
#include <iostream>
#include <utility>
#include <vector>
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <vector>
#include <sys/stat.h>
#include <unistd.h>
#include <exception>
#include <assert.h>
#include "tensorflow/cc/ops/const_op.h"
#include "tensorflow/cc/ops/image_ops.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/graph/default_device.h"
#include "tensorflow/core/graph/graph_def_builder.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/init_main.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/util/command_line_flags.h"
// These are all common classes it's handy to reference with no namespace.
using tensorflow::Flag;
using tensorflow::Tensor;
using tensorflow::Status;
using tensorflow::string;
using tensorflow::int32;
using tensorflow::ops::Softmax;
#define printTensor(T, d) \
std::cout<< (T).tensor<float, (d)>() << std::endl;
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#define IMG_CHANNELS 3
std::vector<string> classes;
std::vector<float>config;
void SplitString(const string& s, std::vector<string>& v, const string& c)
{
string::size_type pos1, pos2;
pos2 = s.find(c);
pos1 = 0;
while(string::npos != pos2)
{
v.push_back(s.substr(pos1, pos2-pos1));
pos1 = pos2 + c.size();
pos2 = s.find(c, pos1);
}
if(pos1 != s.length())
v.push_back(s.substr(pos1));
}
void GetFileNames(string path,std::vector<string>& filenames)
{
DIR *pDir;
struct dirent* ptr;
if(!(pDir = opendir(path.c_str()))){
std::cout<<"Folder doesn't Exist!"<<std::endl;
return;
}
while((ptr = readdir(pDir))!=0) {
if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0){
filenames.push_back(path + "/" + ptr->d_name);
}
}
closedir(pDir);
}
// Reads a model graph definition from disk, and creates a session object you
// can use to run it.
Status LoadGraph(const string& graph_file_name,
std::unique_ptr<tensorflow::Session>* session) {
tensorflow::GraphDef graph_def;
Status load_graph_status =
ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
if (!load_graph_status.ok()) {
return tensorflow::errors::NotFound("Failed to load compute graph at '",
graph_file_name, "'");
}
session->reset(tensorflow::NewSession(tensorflow::SessionOptions()));
Status session_create_status = (*session)->Create(graph_def);
if (!session_create_status.ok()) {
return session_create_status;
}
return Status::OK();
}
cv::Mat resizeKeepAspectRatio(const cv::Mat &input, int width, int height)
{
cv::Mat output;
double h1 = width * (input.rows/(double)input.cols);
double w2 = height * (input.cols/(double)input.rows);
if( h1 <= height) {
cv::resize( input, output, cv::Size(width, h1));
} else {
cv::resize( input, output, cv::Size(w2, height));
}
int top = (height - output.rows) / 2;
int down = (height - output.rows + 1) / 2;
int left = (width - output.cols) / 2;
int right = (width - output.cols + 1) / 2;
cv::copyMakeBorder(output, output, top, down, left, right, cv::BORDER_CONSTANT, cv::Scalar(128,128,128) );
return output;
}
Status readTensorFromMat(const cv::Mat &mat, Tensor &outTensor) {
auto root = tensorflow::Scope::NewRootScope();
using namespace ::tensorflow::ops;
float *p = outTensor.flat<float>().data();
cv::Mat fakeMat(mat.rows, mat.cols, CV_32FC3, p);
mat.convertTo(fakeMat, CV_32FC3, 1.f);
auto input_tensor = Placeholder(root.WithOpName("input"), tensorflow::DT_FLOAT);
std::vector<std::pair<string, tensorflow::Tensor>> inputs = {{"input", outTensor}};
auto noOp = Identity(root.WithOpName("noOp"), outTensor);
// This runs the GraphDef network definition that we've just constructed, and
// returns the results in the output outTensor.
tensorflow::GraphDef graph;
TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));
std::vector<Tensor> outTensors;
std::unique_ptr<tensorflow::Session> session(tensorflow::NewSession(tensorflow::SessionOptions()));
TF_RETURN_IF_ERROR(session->Create(graph));
TF_RETURN_IF_ERROR(session->Run({inputs}, {"noOp"}, {}, &outTensors));
outTensor = outTensors.at(0);
return Status::OK();
}
// Draw the predicted bounding box
void drawPred(int classId, float conf, int left, int top, int right, int bottom, cv::Mat& frame)
{
//Draw a rectangle displaying the bounding box
cv::rectangle(frame, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(255, 178, 50), 2);
//Get the label for the class name and its confidence
string label = cv::format("%.2f", conf);
if (!classes.empty())
{
label = classes[classId] + ":" + label;
}
//Display the label at the top of the bounding box
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = cv::max(top, labelSize.height);
cv::rectangle(frame, cv::Point(left, top - round(1.5*labelSize.height)),
cv::Point(left + round(1.5*labelSize.width), top + baseLine), cv::Scalar(255, 255, 255), cv::FILLED);
cv::putText(frame, label, cv::Point(left, top), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0,0,0),1);
}
// Remove the bounding boxes with low confidence using non-maxima suppression
void postprocess(cv::Mat& frame, const std::vector<cv::Mat>& outs,float bboxThreshold,float nmsThreshold, int input_size,string txt_str)
{
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (size_t i = 0; i < outs.size(); ++i)
{
// Scan through all the bounding boxes output from the network and keep only the
// ones with high confidence scores. Assign the box's class label as the class
// with the highest score for the box.
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
cv::Point classIdPoint;
double confidence;
Get the value and location of the maximum score
cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (data[4] > bboxThreshold)
{
int x0 = (int)(data[0]);
int y0 = (int)(data[1]);
int x1 = (int)(data[2]);
int y1 = (int)(data[3]);
//recover bbox according to input size
int current_size = input_size;
int rows = frame.rows;
int cols = frame.cols;
float final_ratio = std::min((float)current_size/cols, (float)current_size/rows);
int padx = 0.5f * (current_size - final_ratio * cols);
int pady = 0.5f * (current_size - final_ratio * rows);
x0 = (x0 - padx) / final_ratio;
y0 = (y0 - pady) / final_ratio;
x1 = (x1 - padx) / final_ratio;
y1 = (y1 - pady) / final_ratio;
int left = x0;
int top = y0;
int width = x1 - x0;
int height = y1 - y0;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
// Perform non maximum suppression to eliminate redundant overlapping boxes with
// lower confidences
std::vector<int> indices;
cv::dnn::NMSBoxes(boxes, confidences, bboxThreshold, nmsThreshold, indices);
FILE *fp = fopen(txt_str.c_str(),"w");
if(NULL!=fp)
{
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
cv::Rect box = boxes[idx];
drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
fprintf(fp,"%d %.2f %d %d %d %d\n",classIds[idx],confidences[idx],box.x, box.y,
box.x + box.width, box.y + box.height);
}
fclose(fp);
}
}
int main(int argc, char* argv[]) {
string video_path;
string imgs_path;
string pb_path;
string config_file;
string classesFile;
bool img_detect_flag = false;
bool video_detect_flag = false;
int input_size_w,input_size_h;
float confThreshold;
float nmsThreshold;
if (argc == 9 && std::string(argv[1]) == "-v3") {
pb_path = std::string(argv[2]);
if(std::string(argv[3]) == "-n"){
classesFile = std::string(argv[4]);
if(std::string(argv[5]) == "-c"){
config_file = std::string(argv[6]);
if(std::string(argv[7]) == "-i"){
img_detect_flag = true;
imgs_path = std::string(argv[8]);
}
else if(std::string(argv[7]) == "-v"){
video_detect_flag = true;
video_path = std::string(argv[8]);
}
}
}
}
else if (argc == 9 && std::string(argv[1]) == "-v4") {
pb_path = std::string(argv[2]);
if(std::string(argv[3]) == "-n"){
classesFile = std::string(argv[4]);
if(std::string(argv[5]) == "-c"){
config_file = std::string(argv[6]);
if(std::string(argv[7]) == "-i"){
img_detect_flag = true;
imgs_path = std::string(argv[8]);
}
else if(std::string(argv[7]) == "-v"){
video_detect_flag = true;
video_path = std::string(argv[8]);
}
}
}
}
else {
std::cerr << "arguments not right!" << std::endl;
std::cerr << "./yolo_pb -v3 ./yolov3.pb -n ./coco.names -c ./config.ini -i ./imgs // input the yolov3-pb model and the images path to run inference" << std::endl;
std::cerr << "./yolo_pb -v3 ./yolov3.pb -n ./coco.names -c ./config.ini -v ./video/test.mp4 // input yolov3-pb model and the video path to run inference" << std::endl;
std::cerr << "./yolo_pb -v4 ./yolov4.pb -n ./coco.names -c ./config.ini -i ./imgs // input the yolov4-pb model and the images path to run inference" << std::endl;
std::cerr << "./yolo_pb -v4 ./yolov4.pb -n ./coco.names -c ./config.ini -v ./video/test.mp4 // input yolov4-pb model and the video path to run inference" << std::endl;
return -1;
}
string input_layer = "inputs"; //input ops
string final_out = "output_boxes"; //output ops
string root_dir = "";
std::ifstream infile;
infile.open(config_file.data());
assert(infile.is_open() && "Unable to load config file.");
try{
string s;
while(getline(infile,s))
{
config.push_back(std::stof(s));
}
infile.close();
confThreshold = config[0];
nmsThreshold = config[1];
input_size_w = (int)config[2];
input_size_h = (int)config[3];
}
catch(const char* msg)
{
std::cout<<"the config file is not right, this program has stopped"<<msg<<std::endl;
return -1;
}
std::ifstream ifs(classesFile.c_str());
assert(ifs.is_open() && "Unable to load classes file.");
string line;
while (getline(ifs, line)) classes.push_back(line);
// We need to call this to set up global state for TensorFlow.
/*tensorflow::port::InitMain(argv[0], &argc, &argv);
if (argc > 1) {
LOG(ERROR) << "Unknown argument " << argv[1] << "\n";
return -1;
}*/
// First we load and initialize the model.
std::unique_ptr<tensorflow::Session> session;
string graph_path = tensorflow::io::JoinPath(root_dir, pb_path);
Status load_graph_status = LoadGraph(graph_path, &session);
if (!load_graph_status.ok()) {
LOG(ERROR) << load_graph_status;
return -1;
}
// detect the imgs
if(img_detect_flag)
{
std::cout<<"detect img"<<std::endl;
std::vector<string>imgs_file;
GetFileNames(imgs_path,imgs_file);
std::vector<string> v1;
std::vector<string> v2;
for(int i=0;i<imgs_file.size();i++)
{
cv::Mat srcImage = cv::imread(imgs_file[i]);
SplitString(imgs_file[i], v1,".");
SplitString(v1[v1.size()-2], v2,"/");
string txt_str = "./outputs/" + v2[v2.size()-1] + "_detected.txt";
string img_str = "./outputs/" + v2[v2.size()-1] + "_detected.jpg";
cv::Mat rgbImage;
cv::cvtColor(srcImage, rgbImage, cv::COLOR_BGR2RGB);
cv::Mat padImage = resizeKeepAspectRatio(rgbImage, input_size_w, input_size_h);
Tensor resized_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({1, input_size_w,
input_size_h, IMG_CHANNELS}));
Status read_tensor_status = readTensorFromMat(padImage, resized_tensor);
if (!read_tensor_status.ok()) {
LOG(ERROR) << read_tensor_status;
return -1;
}
// Actually run the image through the model.
std::vector<Tensor> outputs;
Status run_status = session->Run({{input_layer, resized_tensor}},
{final_out}, {}, &outputs);
if (!run_status.ok()) {
LOG(ERROR) << "Running model failed: " << run_status;
return -1;
}
//std::cout << outputs[0].shape() << "\n";
float *p = outputs[0].flat<float>().data();
cv::Mat result(outputs[0].dim_size(1), outputs[0].dim_size(2), CV_32FC(1), p);
std::vector<cv::Mat> outs;
outs.push_back (result);
postprocess(rgbImage, outs,confThreshold,nmsThreshold,input_size_w,txt_str);
cv::cvtColor(rgbImage, srcImage , cv::COLOR_RGB2BGR);
cv::imwrite(img_str, srcImage);
}
v1.clear();
v2.clear();
return 1;
}
// detect the video
else if(video_detect_flag)
{
cv::VideoCapture capture;
cv::Mat srcImage;
capture.open(video_path);
if(!capture.isOpened())
{
printf("can not open ...\n");
return -1;
}
int num = 1;
while (capture.read(srcImage))
{
string txt_str = "./outputs/"+ std::to_string(num) + "_detected.txt";
string img_str = "./outputs/"+ std::to_string(num) + "_detected.jpg";
cv::Mat rgbImage;
cv::cvtColor(srcImage, rgbImage, cv::COLOR_BGR2RGB);
cv::Mat padImage = resizeKeepAspectRatio(rgbImage, input_size_w, input_size_h);
Tensor resized_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({1, input_size_w, input_size_h, IMG_CHANNELS}));
Status read_tensor_status = readTensorFromMat(padImage, resized_tensor);
if (!read_tensor_status.ok()) {
LOG(ERROR) << read_tensor_status;
return -1;
}
// Actually run the image through the model.
std::vector<Tensor> outputs;
Status run_status = session->Run({{input_layer, resized_tensor}},
{final_out}, {}, &outputs);
if (!run_status.ok()) {
LOG(ERROR) << "Running model failed: " << run_status;
return -1;
}
//std::cout << outputs[0].shape() << "\n";
float *p = outputs[0].flat<float>().data();
cv::Mat result(outputs[0].dim_size(1), outputs[0].dim_size(2), CV_32FC(1), p);
std::vector<cv::Mat> outs;
outs.push_back (result);
postprocess(rgbImage, outs,confThreshold,nmsThreshold,input_size_w, txt_str);
cv::cvtColor(rgbImage, srcImage , cv::COLOR_RGB2BGR);
cv::imwrite(img_str, srcImage );
num++;
}
capture.release();
return 1;
}
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.5)
project(opencv_yolo)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
add_definitions(-std=c++11)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Relaese)
find_package(CUDA REQUIRED)
message(STATUS "CUDA版本: ${CUDA_VERSION}")
message(STATUS " 头文件目录:${CUDA_INCLUDE_DIRS}")
message(STATUS " 库文件列表:${CUDA_LIBRARIES}")
set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_75;code=sm_75)
set(OpenCV_DIR /opt/opencv440/lib/cmake/opencv4/)
find_package(OpenCV REQUIRED)
include_directories(
../include
${OpenCV_INCLUDE_DIRS})
message(${OpenCV_DIR})
message(STATUS ${OpenCV_LIBS})
# Setup Tensorflow C++
set(TENSORFLOW_INCLUDES
/usr/local/include/tf/
/usr/local/include/tf/bazel-genfiles
/usr/local/include/tf/tensorflow/
/usr/local/include/tf/third-party
/usr/local/include/eigen3)
set(TENSORFLOW_LIBS
/usr/local/lib/libtensorflow_cc.so
/usr/local/lib/libtensorflow_framework.so)
include_directories(
${TENSORFLOW_INCLUDES}
#${PROJECT_SOURCE_DIR}/third_party/eigen3
)
add_executable(yolov3_pb yolov3_pb.cpp )
target_link_libraries(yolov3_pb ${OpenCV_LIBS} ${TENSORFLOW_LIBS})