YOLOV3/V4 pb模型推理 C++

最新推荐文章于 2022-01-09 21:43:01 发布

置顶 hanqu3456

最新推荐文章于 2022-01-09 21:43:01 发布

阅读量650

点赞数 2

分类专栏： yolo-pb推理文章标签：深度学习 tensorflow

本文链接：https://blog.csdn.net/hanqu3456/article/details/110680303

版权

yolo-pb推理专栏收录该内容

1 篇文章 0 订阅

订阅专栏

YOLO系列PB模型推理C++,支持V3、V4

模型转换（weights->pb）

weights code： https://github.com/AlexeyAB/darknet
转换code https://github.com/TNTWEN/OpenVINO-YOLOV4
weights转换为pb模型时，参照转换code的py文件操作即可。

inference cpp代码

// 编译tensorflow详见其他博客

#include <fstream>
#include <sstream>
#include <iostream>
#include <utility>
#include <vector>
#include <stdio.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <vector>
#include <sys/stat.h>
#include <unistd.h>
#include <exception>
#include <assert.h>


#include "tensorflow/cc/ops/const_op.h"
#include "tensorflow/cc/ops/image_ops.h"
#include "tensorflow/cc/ops/standard_ops.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/graph/default_device.h"
#include "tensorflow/core/graph/graph_def_builder.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/stringpiece.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/init_main.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
#include "tensorflow/core/util/command_line_flags.h"

// These are all common classes it's handy to reference with no namespace.
using tensorflow::Flag;
using tensorflow::Tensor;
using tensorflow::Status;
using tensorflow::string;
using tensorflow::int32;
using tensorflow::ops::Softmax;


#define printTensor(T, d) \
    std::cout<< (T).tensor<float, (d)>() << std::endl;

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

#define IMG_CHANNELS    3
std::vector<string> classes;
std::vector<float>config; 



void SplitString(const string& s, std::vector<string>& v, const string& c)
{
    string::size_type pos1, pos2;
    pos2 = s.find(c);
    pos1 = 0;
    while(string::npos != pos2)
    {
        v.push_back(s.substr(pos1, pos2-pos1));
         
        pos1 = pos2 + c.size();
        pos2 = s.find(c, pos1);
    }
    if(pos1 != s.length())
        v.push_back(s.substr(pos1));
}
void GetFileNames(string path,std::vector<string>& filenames)
{
    DIR *pDir;
    struct dirent* ptr;
    if(!(pDir = opendir(path.c_str()))){
        std::cout<<"Folder doesn't Exist!"<<std::endl;
        return;
    }
    while((ptr = readdir(pDir))!=0) {
        if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0){
            filenames.push_back(path + "/" + ptr->d_name);
    }
    }
    closedir(pDir);
}

// Reads a model graph definition from disk, and creates a session object you
// can use to run it.
Status LoadGraph(const string& graph_file_name,
                 std::unique_ptr<tensorflow::Session>* session) {
  tensorflow::GraphDef graph_def;
  Status load_graph_status =
      ReadBinaryProto(tensorflow::Env::Default(), graph_file_name, &graph_def);
  if (!load_graph_status.ok()) {
    return tensorflow::errors::NotFound("Failed to load compute graph at '",
                                        graph_file_name, "'");
  }
  session->reset(tensorflow::NewSession(tensorflow::SessionOptions()));
  Status session_create_status = (*session)->Create(graph_def);
  if (!session_create_status.ok()) {
    return session_create_status;
  }
  return Status::OK();
}

cv::Mat resizeKeepAspectRatio(const cv::Mat &input, int width, int height)
{
    cv::Mat output;

    double h1 = width * (input.rows/(double)input.cols);
    double w2 = height * (input.cols/(double)input.rows);
    if( h1 <= height) {
        cv::resize( input, output, cv::Size(width, h1));
    } else {
        cv::resize( input, output, cv::Size(w2, height));
    }

    int top = (height - output.rows) / 2;
    int down = (height - output.rows + 1) / 2;
    int left = (width - output.cols) / 2;
    int right = (width - output.cols + 1) / 2;

    cv::copyMakeBorder(output, output, top, down, left, right, cv::BORDER_CONSTANT, cv::Scalar(128,128,128) );

    return output;
}

Status readTensorFromMat(const cv::Mat &mat, Tensor &outTensor) {

    auto root = tensorflow::Scope::NewRootScope();
    using namespace ::tensorflow::ops;
    float *p = outTensor.flat<float>().data();
    cv::Mat fakeMat(mat.rows, mat.cols, CV_32FC3, p);
    mat.convertTo(fakeMat, CV_32FC3, 1.f);
    
    auto input_tensor = Placeholder(root.WithOpName("input"), tensorflow::DT_FLOAT);
    std::vector<std::pair<string, tensorflow::Tensor>> inputs = {{"input", outTensor}};
    auto noOp = Identity(root.WithOpName("noOp"), outTensor);

    // This runs the GraphDef network definition that we've just constructed, and
    // returns the results in the output outTensor.
    tensorflow::GraphDef graph;
    TF_RETURN_IF_ERROR(root.ToGraphDef(&graph));

    std::vector<Tensor> outTensors;
    std::unique_ptr<tensorflow::Session> session(tensorflow::NewSession(tensorflow::SessionOptions()));

    TF_RETURN_IF_ERROR(session->Create(graph));
    TF_RETURN_IF_ERROR(session->Run({inputs}, {"noOp"}, {}, &outTensors));
    
    outTensor = outTensors.at(0);
    return Status::OK();
}

// Draw the predicted bounding box
void drawPred(int classId, float conf, int left, int top, int right, int bottom, cv::Mat& frame)
{
    //Draw a rectangle displaying the bounding box
    cv::rectangle(frame, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(255, 178, 50), 2);
    
    //Get the label for the class name and its confidence
    string label = cv::format("%.2f", conf);
    if (!classes.empty())
    {
        label = classes[classId] + ":" + label;
    }
    
    //Display the label at the top of the bounding box
    int baseLine;
    cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
    top = cv::max(top, labelSize.height);
    cv::rectangle(frame, cv::Point(left, top - round(1.5*labelSize.height)), 
                  cv::Point(left + round(1.5*labelSize.width), top + baseLine), cv::Scalar(255, 255, 255), cv::FILLED);
    cv::putText(frame, label, cv::Point(left, top), cv::FONT_HERSHEY_SIMPLEX, 0.75, cv::Scalar(0,0,0),1);
}


// Remove the bounding boxes with low confidence using non-maxima suppression
void postprocess(cv::Mat& frame, const std::vector<cv::Mat>& outs,float bboxThreshold,float nmsThreshold, int input_size,string txt_str)
{
    std::vector<int> classIds;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;
    
    for (size_t i = 0; i < outs.size(); ++i)
    {
        // Scan through all the bounding boxes output from the network and keep only the
        // ones with high confidence scores. Assign the box's class label as the class
        // with the highest score for the box.
        float* data = (float*)outs[i].data;
        for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
        {
            cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
            cv::Point classIdPoint;
            double confidence;
             Get the value and location of the maximum score
            cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
            if (data[4] > bboxThreshold)
            {
                int x0 = (int)(data[0]);
                int y0 = (int)(data[1]);
                int x1 = (int)(data[2]);
                int y1 = (int)(data[3]);
                
                //recover bbox according to input size
                int current_size = input_size;
                int rows = frame.rows;
                int cols = frame.cols;
                float final_ratio = std::min((float)current_size/cols, (float)current_size/rows);
                int padx = 0.5f * (current_size - final_ratio * cols);
                int pady = 0.5f * (current_size - final_ratio * rows);
                
                x0 = (x0 - padx) / final_ratio;
                y0 = (y0 - pady) / final_ratio;
                x1 = (x1 - padx) / final_ratio;
                y1 = (y1 - pady) / final_ratio;
              
                int left = x0;
                int top = y0;
                int width = x1 - x0;
                int height = y1 - y0;
                
                classIds.push_back(classIdPoint.x);
                confidences.push_back((float)confidence);
                boxes.push_back(cv::Rect(left, top, width, height));
            }
        }
    }
    
    // Perform non maximum suppression to eliminate redundant overlapping boxes with
    // lower confidences
    std::vector<int> indices;
    cv::dnn::NMSBoxes(boxes, confidences, bboxThreshold, nmsThreshold, indices);
    FILE *fp = fopen(txt_str.c_str(),"w");
    if(NULL!=fp)
    {
      for (size_t i = 0; i < indices.size(); ++i)
      {
          int idx = indices[i];
          cv::Rect box = boxes[idx];
          drawPred(classIds[idx], confidences[idx], box.x, box.y,
                  box.x + box.width, box.y + box.height, frame);
          fprintf(fp,"%d %.2f %d %d %d %d\n",classIds[idx],confidences[idx],box.x, box.y,
                 box.x + box.width, box.y + box.height);
      }
      fclose(fp);
    }
}

int main(int argc, char* argv[]) {
  string video_path;
  string imgs_path;
  string pb_path;
  string config_file;
  string classesFile;
  bool img_detect_flag = false;
  bool video_detect_flag = false;
  int input_size_w,input_size_h;
  float confThreshold;
  float nmsThreshold;
  if (argc == 9 && std::string(argv[1]) == "-v3") {
		pb_path =  std::string(argv[2]);
    if(std::string(argv[3]) == "-n"){
      classesFile = std::string(argv[4]);
    	if(std::string(argv[5]) == "-c"){
        config_file = std::string(argv[6]);	
        if(std::string(argv[7]) == "-i"){
          img_detect_flag = true;
          imgs_path = std::string(argv[8]);
        }
        else if(std::string(argv[7]) == "-v"){
          video_detect_flag = true;
          video_path = std::string(argv[8]);
        }
      }
    }
  }
  else if (argc == 9 && std::string(argv[1]) == "-v4") {
		pb_path =  std::string(argv[2]);
    if(std::string(argv[3]) == "-n"){
      classesFile = std::string(argv[4]);
    	if(std::string(argv[5]) == "-c"){
        config_file = std::string(argv[6]);	
        if(std::string(argv[7]) == "-i"){
          img_detect_flag = true;
          imgs_path = std::string(argv[8]);
        }
        else if(std::string(argv[7]) == "-v"){
          video_detect_flag = true;
          video_path = std::string(argv[8]);
        }
      }
    }
  }
  else {
        std::cerr << "arguments not right!" << std::endl;
        std::cerr << "./yolo_pb -v3 ./yolov3.pb -n ./coco.names -c ./config.ini -i ./imgs   // input the yolov3-pb model and the images path to  run inference" << std::endl;
        std::cerr << "./yolo_pb -v3 ./yolov3.pb -n ./coco.names -c ./config.ini -v ./video/test.mp4  // input yolov3-pb model and the video path to run inference" << std::endl;
        std::cerr << "./yolo_pb -v4 ./yolov4.pb -n ./coco.names -c ./config.ini -i ./imgs // input the yolov4-pb model and the images path to  run inference" << std::endl;
        std::cerr << "./yolo_pb -v4 ./yolov4.pb -n ./coco.names -c ./config.ini -v ./video/test.mp4  // input yolov4-pb model and the video path to run inference" << std::endl;
        return -1;
    }

  string input_layer = "inputs"; //input ops
  string final_out = "output_boxes"; //output ops
  string root_dir = "";

  std::ifstream infile; 
  infile.open(config_file.data()); 
  assert(infile.is_open() && "Unable to load config file.");  
  try{
    string s;
    while(getline(infile,s))
    {
      config.push_back(std::stof(s));
    }
    infile.close();           
    confThreshold = config[0];
    nmsThreshold = config[1];
    input_size_w = (int)config[2];
    input_size_h = (int)config[3];
  }
  catch(const char* msg)
  {
    std::cout<<"the config file is not right, this program has stopped"<<msg<<std::endl;
    return -1;
  }
   
  std::ifstream ifs(classesFile.c_str());
  assert(ifs.is_open() && "Unable to load classes file.");
  string line;
  while (getline(ifs, line)) classes.push_back(line);

  // We need to call this to set up global state for TensorFlow.
  /*tensorflow::port::InitMain(argv[0], &argc, &argv);
  if (argc > 1) {
    LOG(ERROR) << "Unknown argument " << argv[1] << "\n";
    return -1;
  }*/

  // First we load and initialize the model.
  std::unique_ptr<tensorflow::Session> session;
  string graph_path = tensorflow::io::JoinPath(root_dir, pb_path);
  Status load_graph_status = LoadGraph(graph_path, &session);
  if (!load_graph_status.ok()) {
    LOG(ERROR) << load_graph_status;
    return -1;
  }

  // detect the imgs
  if(img_detect_flag)
  {
    std::cout<<"detect img"<<std::endl;
    std::vector<string>imgs_file;
		GetFileNames(imgs_path,imgs_file);
		std::vector<string> v1;
		std::vector<string> v2;
		for(int i=0;i<imgs_file.size();i++)
		{
			cv::Mat srcImage = cv::imread(imgs_file[i]);
			SplitString(imgs_file[i], v1,"."); 
			SplitString(v1[v1.size()-2], v2,"/"); 
			string txt_str = "./outputs/" + v2[v2.size()-1] + "_detected.txt";
			string img_str = "./outputs/" + v2[v2.size()-1] + "_detected.jpg";
			cv::Mat rgbImage;
      cv::cvtColor(srcImage, rgbImage, cv::COLOR_BGR2RGB);
      cv::Mat padImage = resizeKeepAspectRatio(rgbImage, input_size_w, input_size_h);

      Tensor resized_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({1, input_size_w,
                                input_size_h, IMG_CHANNELS}));
      Status read_tensor_status = readTensorFromMat(padImage, resized_tensor);
      if (!read_tensor_status.ok()) {
        LOG(ERROR) << read_tensor_status;
        return -1;
      }
      // Actually run the image through the model.
      std::vector<Tensor> outputs;
      Status run_status = session->Run({{input_layer, resized_tensor}},
                                     {final_out}, {}, &outputs);
      if (!run_status.ok()) {
        LOG(ERROR) << "Running model failed: " << run_status;
        return -1;
      }
    //std::cout << outputs[0].shape() << "\n";
      float *p = outputs[0].flat<float>().data();
      cv::Mat result(outputs[0].dim_size(1), outputs[0].dim_size(2), CV_32FC(1), p);
      std::vector<cv::Mat> outs;
      outs.push_back (result);
      postprocess(rgbImage, outs,confThreshold,nmsThreshold,input_size_w,txt_str);
      cv::cvtColor(rgbImage, srcImage , cv::COLOR_RGB2BGR);   
      cv::imwrite(img_str, srcImage);
    }
		  v1.clear();
		  v2.clear();
      return 1;
  }

  // detect the video
  else if(video_detect_flag)
  {
    cv::VideoCapture capture;
		cv::Mat srcImage;
		capture.open(video_path);
		if(!capture.isOpened())
		{
			printf("can not open ...\n");
			return -1;
		}
		int num = 1;
		 while (capture.read(srcImage))
		{
      string txt_str =  "./outputs/"+ std::to_string(num) + "_detected.txt";
      string img_str = "./outputs/"+ std::to_string(num) + "_detected.jpg";
			cv::Mat rgbImage;
      cv::cvtColor(srcImage, rgbImage, cv::COLOR_BGR2RGB);
      cv::Mat padImage = resizeKeepAspectRatio(rgbImage, input_size_w, input_size_h);

      Tensor resized_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({1, input_size_w, input_size_h, IMG_CHANNELS}));
      Status read_tensor_status = readTensorFromMat(padImage, resized_tensor);
      if (!read_tensor_status.ok()) {
        LOG(ERROR) << read_tensor_status;
        return -1;
      }
      // Actually run the image through the model.
      std::vector<Tensor> outputs;
      Status run_status = session->Run({{input_layer, resized_tensor}},
                                     {final_out}, {}, &outputs);
      if (!run_status.ok()) {
        LOG(ERROR) << "Running model failed: " << run_status;
        return -1;
      }
    //std::cout << outputs[0].shape() << "\n";
      float *p = outputs[0].flat<float>().data();
      cv::Mat result(outputs[0].dim_size(1), outputs[0].dim_size(2), CV_32FC(1), p);
      std::vector<cv::Mat> outs;
      outs.push_back (result);
      postprocess(rgbImage, outs,confThreshold,nmsThreshold,input_size_w, txt_str);
      cv::cvtColor(rgbImage, srcImage , cv::COLOR_RGB2BGR);   
      cv::imwrite(img_str, srcImage );
      num++;
    }
  	capture.release();
    return 1; 
  }
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.5)
project(opencv_yolo)

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})

add_definitions(-std=c++11)
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_BUILD_TYPE Relaese)
find_package(CUDA REQUIRED)
message(STATUS "CUDA版本: ${CUDA_VERSION}")
message(STATUS "    头文件目录：${CUDA_INCLUDE_DIRS}")
message(STATUS "    库文件列表：${CUDA_LIBRARIES}")
set(CUDA_NVCC_PLAGS ${CUDA_NVCC_PLAGS};-std=c++11;-g;-G;-gencode;arch=compute_75;code=sm_75)

set(OpenCV_DIR /opt/opencv440/lib/cmake/opencv4/)
find_package(OpenCV REQUIRED)
include_directories(
        ../include
        ${OpenCV_INCLUDE_DIRS})
message(${OpenCV_DIR})
message(STATUS ${OpenCV_LIBS})

# Setup Tensorflow C++
set(TENSORFLOW_INCLUDES
        /usr/local/include/tf/
        /usr/local/include/tf/bazel-genfiles
        /usr/local/include/tf/tensorflow/
        /usr/local/include/tf/third-party
        /usr/local/include/eigen3)

set(TENSORFLOW_LIBS
        /usr/local/lib/libtensorflow_cc.so
        /usr/local/lib/libtensorflow_framework.so)

include_directories(
        ${TENSORFLOW_INCLUDES}
        #${PROJECT_SOURCE_DIR}/third_party/eigen3
)

add_executable(yolov3_pb yolov3_pb.cpp )
target_link_libraries(yolov3_pb ${OpenCV_LIBS} ${TENSORFLOW_LIBS})