YoloX从训练到OpenVino-C++部署

该文详细介绍了如何将YoloX模型从训练转化为OpenVINO的IR格式,并在Linux环境下进行C++部署。首先下载并配置OpenVINO环境,接着将ONNX模型转换为IR文件。通过编写cmakeLists.txt文件并执行cmake和make命令完成编译。在C++代码中,实现了模型推理和结果解析。文章还展示了推理性能对比,探讨了Intel CPU下OpenVINO与Python推理的差异。
摘要由CSDN通过智能技术生成

部署步骤
1.下载openvino环境包
2.配置openvino环境
3.将转换好的中间件onnx模型转换为openvino推理引擎IR(.bin和.xml)
4.编写cmakeLists.txt,cmake,make编译
5.预测

下载OpenVino工具包

在这里插入图片描述
选择你需要系统的工具包,然后填一下信息就可以下载对应系统的版本了。
在这里插入图片描述

这里可以选择你要下载的版本,默认是最新版本。

配置OpenVino环境

官网文档
1.临时设置OpenVino环境

source /opt/intel/openvino_2021/bin/setupvars.sh

2.永久设置OpenVino环境

vim ~/.bashrc
source /opt/intel/openvino_2021/bin/setupvars.sh
source ~/.bashrc

注:linux下opt目录内建立intel子目录

ONNX转换IR

首先进入工具包的model_optimizer目录下,然后运行onnx的脚本即可生成IR文件(.bin.xml)

cd /opt/intel/openvino_2021/deploment_tools/model_optimizer/
./install_prerequisites_onnx.sh

cmake,make编译

mkdir build
cd build
cmake .
make

在build目录下,导入生成好的.xml和.bin文件,将本地写好的cmakeLists.txt文件放入,使用cmke .生成Makefile文件,最后再使用make对Makefile文件进行编译,生成C++模型yolox_openvino在这里插入图片描述

cmkeList.txt文件

cmake_minimum_required(VERSION 3.4.1)
set(CMAKE_CXX_STANDARD 14)

project(yolox_openvino_demo)

find_package(OpenCV REQUIRED)
find_package(InferenceEngine REQUIRED)
find_package(ngraph REQUIRED)

include_directories(
    ${OpenCV_INCLUDE_DIRS}
    ${CMAKE_CURRENT_SOURCE_DIR}
    ${CMAKE_CURRENT_BINARY_DIR}
)

add_executable(yolox_openvino yolox_openvino.cpp)

target_link_libraries(
     yolox_openvino
    ${InferenceEngine_LIBRARIES}
    ${NGRAPH_LIBRARIES}
    ${OpenCV_LIBS} 
)

推理文件C++版

// Copyright (C) 2018-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include <iterator>
#include <memory>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <inference_engine.hpp>
#include <chrono>

using namespace InferenceEngine;
using namespace std;
using namespace cv;
/**
 * @brief Define names based depends on Unicode path support
 */
#define tcout                  std::cout
#define file_name_t            std::string
#define imread_t               cv::imread
#define NMS_THRESH 0.45
#define BBOX_CONF_THRESH 0.3

static const int INPUT_W = 640;
static const int INPUT_H = 640;
static const int NUM_CLASSES = 80; // COCO has 80 classes. Modify this value on your own dataset.

cv::Mat static_resize(cv::Mat &img) {
    float r = std::min(INPUT_W / (img.cols * 1.0), INPUT_H / (img.rows * 1.0));
    // r = std::min(r, 1.0f);
    int unpad_w = r * img.cols;
    int unpad_h = r * img.rows;
    cv::Mat re(unpad_h, unpad_w, CV_8UC3);
    cv::resize(img, re, re.size());
    //cv::Mat out(INPUT_W, INPUT_H, CV_8UC3, cv::Scalar(114, 114, 114));
    cv::Mat out(INPUT_H, INPUT_W, CV_8UC3, cv::Scalar(114, 114, 114));
    re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));
    return out;
}

void blobFromImage(cv::Mat &img, Blob::Ptr &blob) {
    int channels = 3;
    int img_h = img.rows;
    int img_w = img.cols;
    InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
    if (!mblob) {
        THROW_IE_EXCEPTION << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
                           << "but by fact we were not able to cast inputBlob to MemoryBlob";
    }
    // locked memory holder should be alive all time while access to its buffer happens
    auto mblobHolder = mblob->wmap();

    float *blob_data = mblobHolder.as<float *>();

    for (size_t c = 0; c < channels; c++) {
        for (size_t h = 0; h < img_h; h++) {
            for (size_t w = 0; w < img_w; w++) {
                blob_data[c * img_w * img_h + h * img_w + w] =
                        (float) img.at<cv::Vec3b>(h, w)[c];
            }
        }

    }
}


struct Object {
    cv::Rect_<float> rect;
    int label;
    float prob;
};

struct GridAndStride {
    int grid0;
    int grid1;
    int stride;
};

static void generate_grids_and_stride(const int target_w, const int target_h, std::vector<int> &strides,
                                      std::vector<GridAndStride> &grid_strides) {
    for (auto stride : strides) {
        int num_grid_w = target_w / stride;
        int num_grid_h = target_h / stride;
        for (int g1 = 0; g1 < num_grid_h; g1++) {
            for (int g0 = 0; g0 < num_grid_w; g0++) {
                grid_strides.push_back((GridAndStride) {g0, g1, stride});
            }
        }
    }
}


static void
generate_yolox_proposals(std::vector<GridAndStride> grid_strides, const float *feat_ptr, float prob_threshold,
                         std::vector<Object> &objects) {

    const int num_anchors = grid_strides.size();

    for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) {
        const int grid0 = grid_strides[anchor_idx].grid0;
        const int grid1 = grid_strides[anchor_idx].grid1;
        const int stride = grid_strides[anchor_idx].stride;

        const int basic_pos = anchor_idx * (NUM_CLASSES + 5);

        // yolox/models/yolo_head.py decode logic
        //  outputs[..., :2] = (outputs[..., :2] + grids) * strides
        //  outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides
        float x_center = (feat_ptr[basic_pos + 0] + grid0) * stride;
        float y_center = (feat_ptr[basic_pos + 1] + grid1) * stride;
        float w = exp(feat_ptr[basic_pos + 2]) * stride;
        float h = exp(feat_ptr[basic_pos + 3]) * stride;
        float x0 = x_center - w * 0.5f;
        float y0 = y_center - h * 0.5f;

        float box_objectness = feat_ptr[basic_pos + 4];
        for (int class_idx = 0; class_idx < NUM_CLASSES; class_idx++) {
            float box_cls_score = feat_ptr[basic_pos + 5 + class_idx];
            float box_prob = box_objectness * box_cls_score;
            if (box_prob > prob_threshold) {
                Object obj;
                obj.rect.x = x0;
                obj.rect.y = y0;
                obj.rect.width = w;
                obj.rect.height = h;
                obj.label = class_idx;
                obj.prob = box_prob;

                objects.push_back(obj);
            }

        } // class loop

    } // point anchor loop
}

static inline float intersection_area(const Object &a, const Object &b) {
    cv::Rect_<float> inter = a.rect & b.rect;
    return inter.area();
}

static void qsort_descent_inplace(std::vector<Object> &faceobjects, int left, int right) {
    int i = left;
    int j = right;
    float p = faceobjects[(left + right) / 2].prob;

    while (i <= j) {
        while (faceobjects[i].prob > p)
            i++;

        while (faceobjects[j].prob < p)
            j--;

        if (i <= j) {
            // swap
            std::swap(faceobjects[i], faceobjects[j]);

            i++;
            j--;
        }
    }

#pragma omp parallel sections
    {
#pragma omp section
        {
            if (left < j) qsort_descent_inplace(faceobjects, left, j);
        }
#pragma omp section
        {
            if (i < right) qsort_descent_inplace(faceobjects, i, right);
        }
    }
}


static void qsort_descent_inplace(std::vector<Object> &objects) {
    if (objects.empty())
        return;

    qsort_descent_inplace(objects, 0, objects.size() - 1);
}

static void nms_sorted_bboxes(const std::vector<Object> &faceobjects, std::vector<int> &picked, float nms_threshold) {
    picked.clear();

    const int n = faceobjects.size();

    std::vector<float> areas(n);
    for (int i = 0; i < n; i++) {
        areas[i] = faceobjects[i].rect.area();
    }

    for (int i = 0; i < n; i++) {
        const Object &a = faceobjects[i];

        int keep = 1;
        for (int j = 0; j < (int) picked.size(); j++) {
            const Object &b = faceobjects[picked[j]];

            // intersection over union
            float inter_area = intersection_area(a, b);
            float union_area = areas[i] + areas[picked[j]] - inter_area;
            // float IoU = inter_area / union_area
            if (inter_area / union_area > nms_threshold)
                keep = 0;
        }

        if (keep)
            picked.push_back(i);
    }
}


static void
decode_outputs(const float *prob, std::vector<Object> &objects, float scale, const int img_w, const int img_h) {
    std::vector<Object> proposals;
    std::vector<int> strides = {8, 16, 32};
    std::vector<GridAndStride> grid_strides;

    generate_grids_and_stride(INPUT_W, INPUT_H, strides, grid_strides);
    generate_yolox_proposals(grid_strides, prob, BBOX_CONF_THRESH, proposals);
    qsort_descent_inplace(proposals);

    std::vector<int> picked;
    nms_sorted_bboxes(proposals, picked, NMS_THRESH);
    int count = picked.size();
    objects.resize(count);

    for (int i = 0; i < count; i++) {
        objects[i] = proposals[picked[i]];

        // adjust offset to original unpadded
        float x0 = (objects[i].rect.x) / scale;
        float y0 = (objects[i].rect.y) / scale;
        float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;
        float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;

        // clip
        x0 = std::max(std::min(x0, (float) (img_w - 1)), 0.f);
        y0 = std::max(std::min(y0, (float) (img_h - 1)), 0.f);
        x1 = std::max(std::min(x1, (float) (img_w - 1)), 0.f);
        y1 = std::max(std::min(y1, (float) (img_h - 1)), 0.f);

        objects[i].rect.x = x0;
        objects[i].rect.y = y0;
        objects[i].rect.width = x1 - x0;
        objects[i].rect.height = y1 - y0;
    }
}

const float color_list[80][3] =
        {
                {0.000, 0.447, 0.741},
                {0.850, 0.325, 0.098},
                {0.929, 0.694, 0.125},
                {0.494, 0.184, 0.556},
                {0.466, 0.674, 0.188},
                {0.301, 0.745, 0.933},
                {0.635, 0.078, 0.184},
                {0.300, 0.300, 0.300},
                {0.600, 0.600, 0.600},
                {1.000, 0.000, 0.000},
                {1.000, 0.500, 0.000},
                {0.749, 0.749, 0.000},
                {0.000, 1.000, 0.000},
                {0.000, 0.000, 1.000},
                {0.667, 0.000, 1.000},
                {0.333, 0.333, 0.000},
                {0.333, 0.667, 0.000},
                {0.333, 1.000, 0.000},
                {0.667, 0.333, 0.000},
                {0.667, 0.667, 0.000},
                {0.667, 1.000, 0.000},
                {1.000, 0.333, 0.000},
                {1.000, 0.667, 0.000},
                {1.000, 1.000, 0.000},
                {0.000, 0.333, 0.500},
                {0.000, 0.667, 0.500},
                {0.000, 1.000, 0.500},
                {0.333, 0.000, 0.500},
                {0.333, 0.333, 0.500},
                {0.333, 0.667, 0.500},
                {0.333, 1.000, 0.500},
                {0.667, 0.000, 0.500},
                {0.667, 0.333, 0.500},
                {0.667, 0.667, 0.500},
                {0.667, 1.000, 0.500},
                {1.000, 0.000, 0.500},
                {1.000, 0.333, 0.500},
                {1.000, 0.667, 0.500},
                {1.000, 1.000, 0.500},
                {0.000, 0.333, 1.000},
                {0.000, 0.667, 1.000},
                {0.000, 1.000, 1.000},
                {0.333, 0.000, 1.000},
                {0.333, 0.333, 1.000},
                {0.333, 0.667, 1.000},
                {0.333, 1.000, 1.000},
                {0.667, 0.000, 1.000},
                {0.667, 0.333, 1.000},
                {0.667, 0.667, 1.000},
                {0.667, 1.000, 1.000},
                {1.000, 0.000, 1.000},
                {1.000, 0.333, 1.000},
                {1.000, 0.667, 1.000},
                {0.333, 0.000, 0.000},
                {0.500, 0.000, 0.000},
                {0.667, 0.000, 0.000},
                {0.833, 0.000, 0.000},
                {1.000, 0.000, 0.000},
                {0.000, 0.167, 0.000},
                {0.000, 0.333, 0.000},
                {0.000, 0.500, 0.000},
                {0.000, 0.667, 0.000},
                {0.000, 0.833, 0.000},
                {0.000, 1.000, 0.000},
                {0.000, 0.000, 0.167},
                {0.000, 0.000, 0.333},
                {0.000, 0.000, 0.500},
                {0.000, 0.000, 0.667},
                {0.000, 0.000, 0.833},
                {0.000, 0.000, 1.000},
                {0.000, 0.000, 0.000},
                {0.143, 0.143, 0.143},
                {0.286, 0.286, 0.286},
                {0.429, 0.429, 0.429},
                {0.571, 0.571, 0.571},
                {0.714, 0.714, 0.714},
                {0.857, 0.857, 0.857},
                {0.000, 0.447, 0.741},
                {0.314, 0.717, 0.741},
                {0.50,  0.5,   0}
        };

static void draw_objects(const cv::Mat &bgr, const std::vector<Object> &objects) {
    static const char *class_names[] = {
            "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
            "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
            "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
            "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
            "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
            "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
            "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
            "cell phone",
            "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
            "hair drier", "toothbrush"
    };

    cv::Mat image = bgr.clone();

    for (size_t i = 0; i < objects.size(); i++) {
        const Object &obj = objects[i];

        fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
                obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);

        cv::Scalar color = cv::Scalar(color_list[obj.label][0], color_list[obj.label][1], color_list[obj.label][2]);
        float c_mean = cv::mean(color)[0];
        cv::Scalar txt_color;
        if (c_mean > 0.5) {
            txt_color = cv::Scalar(0, 0, 0);
        } else {
            txt_color = cv::Scalar(255, 255, 255);
        }

        cv::rectangle(image, obj.rect, color * 255, 2);

        char text[256];
        sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);

        int baseLine = 0;
        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);

        cv::Scalar txt_bk_color = color * 0.7 * 255;

        int x = obj.rect.x;
        int y = obj.rect.y + 1;
        //int y = obj.rect.y - label_size.height - baseLine;
        if (y > image.rows)
            y = image.rows;
        //if (x + label_size.width > image.cols)
        //x = image.cols - label_size.width;

        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
                      txt_bk_color, -1);

        cv::putText(image, text, cv::Point(x, y + label_size.height),
                    cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);
    }

    cv::imwrite("_demo.jpg", image);
    fprintf(stderr, "save vis file\n");
    /* cv::imshow("image", image); */
    /* cv::waitKey(0); */
}


int main(int argc, char *argv[]) {
    try {
        // ------------------------------ Parsing and validation of input arguments
        // ---------------------------------
        if (argc != 4) {
            tcout << "Usage : " << argv[0] << " <path_to_model> <path_to_image> <device_name>" << std::endl;
            return EXIT_FAILURE;
        }

        const file_name_t input_model{argv[1]};
        const file_name_t input_image_path{argv[2]};
        const std::string device_name{argv[3]};
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- Step 1. Initialize inference engine core
        // -------------------------------------
        Core ie;
        // -----------------------------------------------------------------------------------------------------

        // Step 2. Read a model in OpenVINO Intermediate Representation (.xml and
        // .bin files) or ONNX (.onnx file) format
        CNNNetwork network = ie.ReadNetwork(input_model);
        if (network.getOutputsInfo().size() != 1)
            throw std::logic_error("Sample supports topologies with 1 output only");
        if (network.getInputsInfo().size() != 1)
            throw std::logic_error("Sample supports topologies with 1 input only");
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- Step 3. Configure input & output
        // ---------------------------------------------
        // --------------------------- Prepare input blobs
        // -----------------------------------------------------
        InputInfo::Ptr input_info = network.getInputsInfo().begin()->second;
        std::string input_name = network.getInputsInfo().begin()->first;

        /* Mark input as resizable by setting of a resize algorithm.
         * In this case we will be able to set an input blob of any shape to an
         * infer request. Resize and layout conversions are executed automatically
         * during inference */
        //input_info->getPreProcess().setResizeAlgorithm(RESIZE_BILINEAR);
        //input_info->setLayout(Layout::NHWC);
        //input_info->setPrecision(Precision::FP32);

        // --------------------------- Prepare output blobs
        // ----------------------------------------------------
        if (network.getOutputsInfo().empty()) {
            std::cerr << "Network outputs info is empty" << std::endl;
            return EXIT_FAILURE;
        }
        DataPtr output_info = network.getOutputsInfo().begin()->second;
        std::string output_name = network.getOutputsInfo().begin()->first;

        output_info->setPrecision(Precision::FP32);
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- Step 4. Loading a model to the device
        // ------------------------------------------
        ExecutableNetwork executable_network = ie.LoadNetwork(network, device_name);
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- Step 5. Create an infer request
        // -------------------------------------------------
        InferRequest infer_request = executable_network.CreateInferRequest();
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- Step 6. Prepare input
        // --------------------------------------------------------
        /* Read input image to a blob and set it to an infer request without resize
         * and layout conversions. */
        cv::Mat image = imread_t(input_image_path);
        cv::Mat pr_img = static_resize(image);
        Blob::Ptr imgBlob = infer_request.GetBlob(input_name);     // just wrap Mat data by Blob::Ptr
        blobFromImage(pr_img, imgBlob);

        // infer_request.SetBlob(input_name, imgBlob);  // infer_request accepts input blob of any size
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- Step 7. Do inference
        // --------------------------------------------------------
        /* Running the request synchronously */
        infer_request.Infer();
        // -----------------------------------------------------------------------------------------------------

        // --------------------------- Step 8. Process output
        // ------------------------------------------------------
        const Blob::Ptr output_blob = infer_request.GetBlob(output_name);
        MemoryBlob::CPtr moutput = as<MemoryBlob>(output_blob);
        if (!moutput) {
            throw std::logic_error("We expect output to be inherited from MemoryBlob, "
                                   "but by fact we were not able to cast output to MemoryBlob");
        }
        // locked memory holder should be alive all time while access to its buffer
        // happens
        auto moutputHolder = moutput->rmap();
        const float *net_pred = moutputHolder.as<const PrecisionTrait<Precision::FP32>::value_type *>();

        int img_w = image.cols;
        int img_h = image.rows;
        float scale = std::min(INPUT_W / (image.cols * 1.0), INPUT_H / (image.rows * 1.0));
        std::vector<Object> objects;

        decode_outputs(net_pred, objects, scale, img_w, img_h);
        auto start = chrono::high_resolution_clock::now();
        draw_objects(image, objects);
        auto end = chrono::high_resolution_clock::now();
        std::chrono::duration<double> diff = end - start;
        cout << "inference time: " << diff.count() << "s" << endl;
        // -----------------------------------------------------------------------------------------------------
    } catch (const std::exception &ex) {
        std::cerr << ex.what() << std::endl;
        return EXIT_FAILURE;
    }
    return EXIT_SUCCESS;
}

云侧部署Intel CPU openvino与python下推理性能对比

在这里插入图片描述
在这里插入图片描述

持续更新中…

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值